library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ggstatsplot)
## You can cite this package as:
## Patil, I. (2021). Visualizations with statistical details: The 'ggstatsplot' approach.
## Journal of Open Source Software, 6(61), 3167, doi:10.21105/joss.03167
library(haven)
library(phyloseq)
library(ggsci)
library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.6-4
library(uwot)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library(ggforce)
library(glue)
library(easystats)
## # Attaching packages: easystats 0.5.2.8 (red = needs update)
## ✔ bayestestR 0.13.0 ✔ correlation 0.8.3
## ✖ datawizard 0.6.4 ✔ effectsize 0.8.2
## ✔ insight 0.18.8 ✔ modelbased 0.8.5
## ✔ performance 0.10.1 ✔ parameters 0.20.0
## ✔ report 0.5.5.1 ✔ see 0.7.4
##
## Restart the R-Session and update packages in red with `easystats::easystats_update()`.
library(ggrepel)
library(gamlss)
## Loading required package: splines
## Loading required package: gamlss.data
##
## Attaching package: 'gamlss.data'
##
## The following object is masked from 'package:datasets':
##
## sleep
##
## Loading required package: gamlss.dist
## Loading required package: MASS
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
##
## Loading required package: nlme
##
## Attaching package: 'nlme'
##
## The following object is masked from 'package:dplyr':
##
## collapse
##
## Loading required package: parallel
## ********** GAMLSS Version 5.4-10 **********
## For more on GAMLSS look at https://www.gamlss.com/
## Type gamlssNews() to see new features/changes/bug fixes.
library(emmeans)
library(mixOmics)
##
## Loaded mixOmics 6.22.0
## Thank you for using mixOmics!
## Tutorials: http://mixomics.org
## Bookdown vignette: https://mixomicsteam.github.io/Bookdown
## Questions, issues: Follow the prompts at http://mixomics.org/contact-us
## Cite us: citation('mixOmics')
##
##
## Attaching package: 'mixOmics'
##
## The following object is masked from 'package:purrr':
##
## map
source("/storage/megalodon/R/import_kraken.R")
source("/storage/megalodon/R/kraken_heat_tree.R")
setwd("/storage/mariaInsenser/")
files <- dir("./reports/", pattern = "report", full.names = T)
reports <- import_kraken(files,20)
## Loading required package: doParallel
## Loading required package: foreach
##
## Attaching package: 'foreach'
## The following objects are masked from 'package:purrr':
##
## accumulate, when
## Loading required package: iterators
metadata <- read_tsv("LastMetadata.tsv") %>%
mutate(Dieta = as.factor(Dieta)) %>%
mutate(Sexo = as.factor(Sexo)) %>%
mutate(Tratamiento = as.factor(Tratamiento)) %>%
mutate(Castrado = as.factor(Castrado))
## Registered S3 method overwritten by 'bit':
## method from
## print.ri gamlss
## Rows: 234 Columns: 94
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (7): Sample, Position, Run, Code_Sample, Description, GRUPO, Madre
## dbl (87): NumReads, Code_Microbiota, Studio, Orden, Heces, Grupos, Grupo3, D...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
metadata <- metadata %>% mutate(Description = ifelse(grepl("solo",Description),"Destete",Description))
design <- read_tsv("DesignStudy")
## Rows: 12 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (4): Group, Sex, Diet, Treatment
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
table <- reports %>%
distinct() %>%
inner_join(metadata)
## Joining, by = "Sample"
Distribucion de reads en cada grupo
table %>% mutate(Studio = as.factor(Studio)) %>%
filter(Name =="root") %>%
ggplot(aes(x=GRUPO, y = Reads, fill = Studio)) +
geom_boxplot() + facet_wrap(Studio ~ ., scale = "free_x")
Numero de reasd classificados y no clasificados
table %>%
filter(depth ==1) %>%
mutate(Name2 = gsub("root","classified",Name2)) %>%
distinct() %>%
ggplot(aes(x=Sample, y = Prop, fill = Name2)) +
geom_col() +
coord_flip() + scale_fill_d3()
Machos vs Hembras (Dieta NO, Tratamiento Control, Studio 3, Castrados no)
formula: ~ Sexo
Creamos la OTU-table para resolver la pregunta.
samplingSize <- table %>%
filter(Name2 == "Bacteria") %>%
filter(Studio ==3) %>%
filter(Description =="Adulta") %>%
filter(Castrado == 0) %>%
filter(Tratamiento ==0) %>%
filter(Dieta ==0) %>%
group_by(Sample) %>%
summarise(TotalReads = sum(Reads)) %>%
slice_min(TotalReads)
otu_table <- table %>%
filter(!grepl("S",TaxRank)) %>%
filter(!grepl("U",TaxRank)) %>%
filter(!grepl("R",TaxRank)) %>%
filter(!grepl("D",TaxRank)) %>%
filter(Studio ==3) %>%
filter(Description =="Adulta") %>%
filter(Castrado == 0) %>%
filter(Tratamiento ==0) %>%
filter(Dieta ==0) %>%
dplyr::select(Sample,Reads,NCBI) %>%
pivot_wider(names_from = Sample,
values_from = Reads,
values_fill =0) %>%
column_to_rownames("NCBI") %>%
as.matrix() %>%
t() %>%
rrarefy(sample = samplingSize$TotalReads) %>%
t()
Calculamos la alpha diversity
simpson <- diversity(otu_table, MARGIN = 2, index = "simpson") %>%
as.data.frame() %>%
rownames_to_column("Sample") %>%
rename(Simpson =2)
shannon <- diversity(otu_table, MARGIN = 2,index = "shannon" ) %>%
as.data.frame() %>%
rownames_to_column("Sample") %>%
rename(Shannon = 2)
div <- inner_join(simpson, shannon) %>%
pivot_longer(names_to = "DiversityIndex",values_to = "DiversityValue", -Sample) %>%
inner_join(metadata)
## Joining, by = "Sample"
## Joining, by = "Sample"
Análisis del indice de Simpson
mod_gaus <- glm(DiversityValue ~ Sexo, data = div %>%
filter(DiversityIndex == "Simpson"),
family = gaussian)
report(mod_gaus)
## We fitted a linear model (estimated using ML) to predict DiversityValue with
## Sexo (formula: DiversityValue ~ Sexo). The model's explanatory power is
## moderate (R2 = 0.20). The model's intercept, corresponding to Sexo = 0, is at
## 0.93 (95% CI [0.92, 0.94], t(12) = 169.16, p < .001). Within this model:
##
## - The effect of Sexo [1] is statistically non-significant and positive (beta =
## 0.01, 95% CI [-1.90e-03, 0.03], t(12) = 1.72, p = 0.086; Std. beta = 0.86, 95%
## CI [-0.12, 1.83])
##
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation.
estimate_contrasts(mod_gaus, contrast = c("Sexo"))
## Marginal Contrasts Analysis
##
## Level1 | Level2 | Difference | 95% CI | SE | t(12) | p
## -----------------------------------------------------------------------
## Sexo0 | Sexo1 | -0.01 | [-0.03, 0.00] | 7.80e-03 | -1.72 | 0.112
##
## Marginal contrasts estimated at Sexo
## p-value adjustment method: Holm (1979)
Análisis del indice de Shannon
mod_gaus <- glm(DiversityValue ~Sexo, data = div %>%
filter(DiversityIndex == "Shannon"),
family = gaussian)
report(mod_gaus)
## We fitted a linear model (estimated using ML) to predict DiversityValue with
## Sexo (formula: DiversityValue ~ Sexo). The model's explanatory power is
## substantial (R2 = 0.47). The model's intercept, corresponding to Sexo = 0, is
## at 3.23 (95% CI [3.08, 3.37], t(12) = 43.73, p < .001). Within this model:
##
## - The effect of Sexo [1] is statistically significant and positive (beta =
## 0.34, 95% CI [0.13, 0.54], t(12) = 3.25, p = 0.001; Std. beta = 1.32, 95% CI
## [0.52, 2.11])
##
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation.
estimate_contrasts(mod_gaus, contrast = c("Sexo"))
## Marginal Contrasts Analysis
##
## Level1 | Level2 | Difference | 95% CI | SE | t(12) | p
## --------------------------------------------------------------------
## Sexo0 | Sexo1 | -0.34 | [-0.57, -0.11] | 0.10 | -3.25 | 0.007
##
## Marginal contrasts estimated at Sexo
## p-value adjustment method: Holm (1979)
Vamos a calcular la Beta-diversidad.
dist <- avgdist(otu_table %>% t(),sample = samplingSize$TotalReads, dmethod = "bray") %>%
as.matrix() %>%
as_tibble(rownames = "Sample") %>%
inner_join(metadata)
## Joining, by = "Sample"
pheatmap::pheatmap(dist %>%
dplyr::select(Sample,contains("report")) %>%
column_to_rownames("Sample") %>%
as.matrix() %>%
as.dist(),
annotation_row = dist %>%
dplyr::select(Sample,Sexo) %>%
mutate(Sexo = as.factor(Sexo)) %>%
column_to_rownames("Sample"))
X = otu_table %>% t()
Y = otu_table %>%
t() %>%
as_tibble(rownames = "Sample") %>% inner_join(metadata %>% dplyr::select(Sample,Sexo)) %>% pull(Sexo)
## Joining, by = "Sample"
pl <- splsda(X,Y, ncomp = 10, scale = T)
## Warning in cor(A[[k]], variates.A[[k]]): the standard deviation is zero
plotIndiv(pl, ind.names = T, ellipse = TRUE, legend =TRUE)
dist_ad <-dist %>%
dplyr::select(Sample,contains("report")) %>%
column_to_rownames("Sample") %>%
as.matrix() %>%
as.dist()
adonis2(dist_ad ~ Sexo, dist %>%
dplyr::select(Sample,Sexo) %>%
column_to_rownames("Sample"))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_ad ~ Sexo, data = dist %>% dplyr::select(Sample, Sexo) %>% column_to_rownames("Sample"))
## Df SumOfSqs R2 F Pr(>F)
## Sexo 1 0.22594 0.16207 2.3209 0.067 .
## Residual 12 1.16819 0.83793
## Total 13 1.39413 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
• Qué taxas son las responsables de las diferencias
gamlss_table <- otu_table %>%
t() %>%
as_tibble(rownames = "Sample") %>%
pivot_longer(names_to = "NCBI", values_to = "Reads",-Sample) %>%
inner_join(metadata) %>%
group_by(Sample) %>%
mutate(TotalReads = sum(Reads), NSpecies = sum(Reads>0)) %>%
ungroup() %>%
group_by(NCBI) %>%
mutate(Nzeros = sum(Reads ==0)) %>%
ungroup() %>%
mutate(NSamples = n_distinct(Sample)) %>%
mutate(Freq = (Reads+1)/TotalReads) %>%
mutate(ZeroFreq = Nzeros/NSamples) %>%
filter(ZeroFreq < 0.9) %>%
dplyr::select(NCBI,Reads,Freq,ZeroFreq,Sexo,Sample) %>%
mutate(Reads = Reads+1) %>%
nest_by(NCBI) %>%
mutate(model = list(gamlss(formula = Freq ~ Sexo,
data = data,
family = "BE",
control = gamlss.control(n.cyc = 200, trace = F)))) %>%
mutate(test = list(emmeans(model, specs = "Sexo", data = data) %>%
pairs() %>%
as.data.frame()))
## Joining, by = "Sample"
gamlss_table %>%
dplyr::select(NCBI,test) %>%
unnest(test) %>%
mutate(padj = p.adjust(p.value,method = "BH")) %>%
inner_join(table %>%
dplyr::select(NCBI,Name2) %>%
mutate(NCBI = as.character(NCBI)) %>%
distinct()) %>%
dplyr::select(NCBI,Name = Name2,contrast,z.ratio,padj) %>%
write_tsv("Cuestion1.tsv")
## Joining, by = "NCBI"
gamlss_table %>%
dplyr::select(NCBI,test) %>%
unnest(test) %>%
mutate(padj = p.adjust(p.value,method = "BH")) %>%
inner_join(table %>%
dplyr::select(NCBI,Name2) %>%
mutate(NCBI = as.character(NCBI)) %>%
distinct()) %>%
dplyr::select(NCBI,Name = Name2,contrast,z.ratio,padj) %>%
#separate(contrast, c("A","B"), sep = " - ") %>%
mutate(sig = ifelse(padj < 0.01,"Significative","No-Significative")) %>%
mutate(label = ifelse(sig == "Significative",Name,NA)) %>%
ggplot(aes(x= z.ratio, y = -log10(padj), color = sig, label = label)) +
geom_point(alpha = 0.5) +
geom_text_repel(size = 4)+
scale_color_d3() +
theme_light()
## Joining, by = "NCBI"
## Warning: Removed 878 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 35 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
Machos vs Hembras (Dieta SI/NO, Tratamiento Control, Studio 3, Castrados no) formula: ~Sexo * Dieta Creamos la OTU-table para resolver la pregunta.
samplingSize <- table %>%
filter(Name2 == "Bacteria") %>%
filter(Studio ==3) %>%
filter(Description =="Adulta") %>%
filter(Castrado == 0) %>%
filter(Tratamiento ==0) %>%
#filter(Dieta ==0) %>%
group_by(Sample) %>%
summarise(TotalReads = sum(Reads)) %>%
slice_min(TotalReads)
otu_table <- table %>%
filter(!grepl("S",TaxRank)) %>%
filter(!grepl("U",TaxRank)) %>%
filter(!grepl("R",TaxRank)) %>%
filter(!grepl("D",TaxRank)) %>%
filter(Studio ==3) %>%
filter(Description =="Adulta") %>%
filter(Castrado == 0) %>%
filter(Tratamiento ==0) %>%
#filter(Dieta ==0) %>%
dplyr::select(Sample,Reads,NCBI) %>%
pivot_wider(names_from = Sample,
values_from = Reads,
values_fill =0) %>%
column_to_rownames("NCBI") %>%
as.matrix() %>%
t() %>%
rrarefy(sample = samplingSize$TotalReads) %>%
t()
Calculamos la alpha diversity
simpson <- diversity(otu_table, MARGIN = 2, index = "simpson") %>%
as.data.frame() %>%
rownames_to_column("Sample") %>%
rename(Simpson =2)
shannon <- diversity(otu_table, MARGIN = 2,index = "shannon" ) %>%
as.data.frame() %>%
rownames_to_column("Sample") %>%
rename(Shannon = 2)
div <- inner_join(simpson, shannon) %>%
pivot_longer(names_to = "DiversityIndex",values_to = "DiversityValue", -Sample) %>%
inner_join(metadata)
## Joining, by = "Sample"
## Joining, by = "Sample"
Análisis del indice de Simpson
mod_gaus <- glm(DiversityValue ~ Sexo*Dieta, data = div %>%
filter(DiversityIndex == "Simpson"),
family = gaussian)
report(mod_gaus)
## We fitted a linear model (estimated using ML) to predict DiversityValue with
## Sexo and Dieta (formula: DiversityValue ~ Sexo * Dieta). The model's
## explanatory power is weak (R2 = 0.13). The model's intercept, corresponding to
## Sexo = 0 and Dieta = 0, is at 0.93 (95% CI [0.92, 0.95], t(29) = 141.21, p <
## .001). Within this model:
##
## - The effect of Sexo [1] is statistically non-significant and positive (beta =
## 0.01, 95% CI [-4.99e-03, 0.03], t(29) = 1.43, p = 0.154; Std. beta = 0.75, 95%
## CI [-0.28, 1.78])
## - The effect of Dieta [1] is statistically non-significant and positive (beta =
## 0.01, 95% CI [-6.24e-03, 0.03], t(29) = 1.24, p = 0.216; Std. beta = 0.60, 95%
## CI [-0.35, 1.55])
## - The effect of Sexo [1] × Dieta [1] is statistically significant and negative
## (beta = -0.02, 95% CI [-0.05, -7.52e-04], t(29) = -2.02, p = 0.043; Std. beta =
## -1.40, 95% CI [-2.76, -0.04])
##
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation.
estimate_contrasts(mod_gaus, contrast = c("Sexo","Dieta"))
## Marginal Contrasts Analysis
##
## Level1 | Level2 | Difference | 95% CI | SE | t(29) | p
## ------------------------------------------------------------------------------------
## Sexo0 Dieta0 | Sexo0 Dieta1 | -0.01 | [-0.04, 0.01] | 8.62e-03 | -1.24 | 0.802
## Sexo0 Dieta0 | Sexo1 Dieta0 | -0.01 | [-0.04, 0.01] | 9.35e-03 | -1.43 | 0.802
## Sexo0 Dieta0 | Sexo1 Dieta1 | 9.24e-04 | [-0.02, 0.03] | 8.82e-03 | 0.10 | > .999
## Sexo0 Dieta1 | Sexo1 Dieta1 | 0.01 | [-0.01, 0.03] | 8.04e-03 | 1.44 | 0.802
## Sexo1 Dieta0 | Sexo0 Dieta1 | 2.68e-03 | [-0.02, 0.03] | 8.62e-03 | 0.31 | > .999
## Sexo1 Dieta0 | Sexo1 Dieta1 | 0.01 | [-0.01, 0.04] | 8.82e-03 | 1.62 | 0.699
##
## Marginal contrasts estimated at Sexo, Dieta
## p-value adjustment method: Holm (1979)
Análisis del indice de Shannon
mod_gaus <- glm(DiversityValue ~Sexo * Dieta, data = div %>%
filter(DiversityIndex == "Shannon"),
family = gaussian)
report(mod_gaus)
## We fitted a linear model (estimated using ML) to predict DiversityValue with
## Sexo and Dieta (formula: DiversityValue ~ Sexo * Dieta). The model's
## explanatory power is moderate (R2 = 0.19). The model's intercept, corresponding
## to Sexo = 0 and Dieta = 0, is at 3.23 (95% CI [3.03, 3.42], t(29) = 32.30, p <
## .001). Within this model:
##
## - The effect of Sexo [1] is statistically significant and positive (beta =
## 0.34, 95% CI [0.06, 0.62], t(29) = 2.40, p = 0.016; Std. beta = 1.21, 95% CI
## [0.22, 2.20])
## - The effect of Dieta [1] is statistically non-significant and positive (beta =
## 0.18, 95% CI [-0.07, 0.44], t(29) = 1.42, p = 0.157; Std. beta = 0.66, 95% CI
## [-0.25, 1.57])
## - The effect of Sexo [1] × Dieta [1] is statistically significant and negative
## (beta = -0.45, 95% CI [-0.82, -0.09], t(29) = -2.43, p = 0.015; Std. beta =
## -1.62, 95% CI [-2.92, -0.31])
##
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation.
estimate_contrasts(mod_gaus, contrast = c("Sexo","Dieta"))
## Marginal Contrasts Analysis
##
## Level1 | Level2 | Difference | 95% CI | SE | t(29) | p
## -------------------------------------------------------------------------------
## Sexo0 Dieta0 | Sexo0 Dieta1 | -0.18 | [-0.55, 0.18] | 0.13 | -1.42 | 0.670
## Sexo0 Dieta0 | Sexo1 Dieta0 | -0.34 | [-0.74, 0.06] | 0.14 | -2.40 | 0.138
## Sexo0 Dieta0 | Sexo1 Dieta1 | -0.07 | [-0.45, 0.31] | 0.13 | -0.54 | 0.732
## Sexo0 Dieta1 | Sexo1 Dieta1 | 0.11 | [-0.23, 0.46] | 0.12 | 0.93 | 0.732
## Sexo1 Dieta0 | Sexo0 Dieta1 | 0.15 | [-0.21, 0.52] | 0.13 | 1.19 | 0.732
## Sexo1 Dieta0 | Sexo1 Dieta1 | 0.27 | [-0.11, 0.64] | 0.13 | 2.01 | 0.270
##
## Marginal contrasts estimated at Sexo, Dieta
## p-value adjustment method: Holm (1979)
Vamos a calcular la Beta-diversidad.
dist <- avgdist(otu_table %>% t(),sample = samplingSize$TotalReads, dmethod = "bray") %>%
as.matrix() %>%
as_tibble(rownames = "Sample") %>%
inner_join(metadata)
## Joining, by = "Sample"
pheatmap::pheatmap(dist %>%
dplyr::select(Sample,contains("report")) %>%
column_to_rownames("Sample") %>%
as.matrix() %>%
as.dist(),
annotation_row = dist %>%
dplyr::select(Sample,Sexo,Dieta) %>%
mutate(Sexo = as.factor(Sexo)) %>%
mutate(Dieta = as.factor(Dieta)) %>%
column_to_rownames("Sample"))
X = otu_table %>% t()
Y = otu_table %>%
t() %>%
as_tibble(rownames = "Sample") %>% inner_join(metadata %>% dplyr::select(Sample,Sexo,Dieta)) %>% pull(Sexo)
## Joining, by = "Sample"
pl <- splsda(X,Y, ncomp = 10, scale = T)
## Warning in cor(A[[k]], variates.A[[k]]): the standard deviation is zero
plotIndiv(pl, ind.names = T, ellipse = TRUE, legend =TRUE)
Y = otu_table %>%
t() %>%
as_tibble(rownames = "Sample") %>% inner_join(metadata %>% dplyr::select(Sample,Sexo,Dieta)) %>% pull(Dieta)
## Joining, by = "Sample"
pl <- splsda(X,Y, ncomp = 10, scale = T)
## Warning in cor(A[[k]], variates.A[[k]]): the standard deviation is zero
plotIndiv(pl, ind.names = T, ellipse = TRUE, legend =TRUE)
dist_ad <-dist %>%
dplyr::select(Sample,contains("report")) %>%
column_to_rownames("Sample") %>%
as.matrix() %>%
as.dist()
adonis2(dist_ad ~ Sexo * Dieta, dist %>%
dplyr::select(Sample,Sexo,Dieta) %>%
column_to_rownames("Sample"))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_ad ~ Sexo * Dieta, data = dist %>% dplyr::select(Sample, Sexo, Dieta) %>% column_to_rownames("Sample"))
## Df SumOfSqs R2 F Pr(>F)
## Sexo 1 0.12868 0.04101 1.6564 0.139
## Dieta 1 0.54336 0.17315 6.9940 0.001 ***
## Sexo:Dieta 1 0.21306 0.06790 2.7425 0.039 *
## Residual 29 2.25297 0.71795
## Total 32 3.13807 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
• Qué taxas son las responsables de las diferencias
gamlss_table <- otu_table %>%
t() %>%
as_tibble(rownames = "Sample") %>%
pivot_longer(names_to = "NCBI", values_to = "Reads",-Sample) %>%
inner_join(metadata) %>%
group_by(Sample) %>%
mutate(TotalReads = sum(Reads), NSpecies = sum(Reads>0)) %>%
ungroup() %>%
group_by(NCBI) %>%
mutate(Nzeros = sum(Reads ==0)) %>%
ungroup() %>%
mutate(NSamples = n_distinct(Sample)) %>%
mutate(Freq = (Reads+1)/TotalReads) %>%
mutate(ZeroFreq = Nzeros/NSamples) %>%
filter(ZeroFreq < 0.9) %>%
dplyr::select(NCBI,Reads,Freq,ZeroFreq,Sexo,Dieta,Sample) %>%
mutate(Reads = Reads+1) %>%
nest_by(NCBI) %>%
mutate(model = list(gamlss(formula = Freq ~ Sexo * Dieta,
data = data,
family = "BE",
control = gamlss.control(n.cyc = 200, trace = F)))) %>%
mutate(test = list(emmeans(model, specs = c("Sexo","Dieta"), data = data) %>%
pairs() %>%
as.data.frame()))
## Joining, by = "Sample"
gamlss_table %>%
dplyr::select(NCBI,test) %>%
unnest(test) %>%
mutate(padj = p.adjust(p.value,method = "BH")) %>%
inner_join(table %>%
dplyr::select(NCBI,Name2) %>%
mutate(NCBI = as.character(NCBI)) %>%
distinct()) %>%
dplyr::select(NCBI,Name = Name2,contrast,z.ratio,padj) %>%
write_tsv("Cuestion2.tsv")
## Joining, by = "NCBI"
gamlss_table %>%
dplyr::select(NCBI,test) %>%
unnest(test) %>%
mutate(padj = p.adjust(p.value,method = "BH")) %>%
inner_join(table %>%
dplyr::select(NCBI,Name2) %>%
mutate(NCBI = as.character(NCBI)) %>%
distinct()) %>%
dplyr::select(NCBI,Name = Name2,contrast,z.ratio,padj) %>%
#separate(contrast, c("A","B"), sep = " - ") %>%
mutate(sig = ifelse(padj < 0.01,"Significative","No-Significative")) %>%
mutate(label = ifelse(sig == "Significative",Name,NA)) %>%
ggplot(aes(x= z.ratio, y = -log10(padj), color = sig, label = label)) +
geom_point(alpha = 0.5) +
geom_text_repel(size = 4)+
scale_color_d3() +
theme_light() + facet_wrap(~contrast)
## Joining, by = "NCBI"
## Warning: Removed 5607 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 11 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 41 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 34 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
Machos vs Machos Castrados (Dieta NO, Tratamiento Control, Studio 3, Hembras NO) formula: ~Castrado Creamos la OTU-table para resolver la pregunta.
samplingSize <- table %>%
filter(Name2 == "Bacteria") %>%
filter(Studio ==3) %>%
filter(Description =="Adulta") %>%
filter(Sexo == 1) %>%
filter(Tratamiento ==0) %>%
filter(Dieta ==0) %>%
group_by(Sample) %>%
summarise(TotalReads = sum(Reads)) %>%
slice_min(TotalReads)
otu_table <- table %>%
filter(!grepl("S",TaxRank)) %>%
filter(!grepl("U",TaxRank)) %>%
filter(!grepl("R",TaxRank)) %>%
filter(!grepl("D",TaxRank)) %>%
filter(Studio ==3) %>%
filter(Description =="Adulta") %>%
filter(Sexo == 1) %>%
filter(Tratamiento ==0) %>%
filter(Dieta ==0) %>%
dplyr::select(Sample,Reads,NCBI) %>%
pivot_wider(names_from = Sample,
values_from = Reads,
values_fill =0) %>%
column_to_rownames("NCBI") %>%
as.matrix() %>%
t() %>%
rrarefy(sample = samplingSize$TotalReads) %>%
t()
Calculamos la alpha diversity
simpson <- diversity(otu_table, MARGIN = 2, index = "simpson") %>%
as.data.frame() %>%
rownames_to_column("Sample") %>%
rename(Simpson =2)
shannon <- diversity(otu_table, MARGIN = 2,index = "shannon" ) %>%
as.data.frame() %>%
rownames_to_column("Sample") %>%
rename(Shannon = 2)
div <- inner_join(simpson, shannon) %>%
pivot_longer(names_to = "DiversityIndex",values_to = "DiversityValue", -Sample) %>%
inner_join(metadata)
## Joining, by = "Sample"
## Joining, by = "Sample"
Análisis del indice de Simpson
mod_gaus <- glm(DiversityValue ~ Castrado, data = div %>%
filter(DiversityIndex == "Simpson")%>%
mutate(Sexo = as.factor(Sexo)),
family = gaussian)
report(mod_gaus)
## We fitted a linear model (estimated using ML) to predict DiversityValue with
## Castrado (formula: DiversityValue ~ Castrado). The model's explanatory power is
## very weak (R2 = 8.92e-03). The model's intercept, corresponding to Castrado =
## 0, is at 0.95 (95% CI [0.93, 0.96], t(13) = 128.51, p < .001). Within this
## model:
##
## - The effect of Castrado [1] is statistically non-significant and positive
## (beta = 3.45e-03, 95% CI [-0.02, 0.02], t(13) = 0.34, p = 0.732; Std. beta =
## 0.18, 95% CI [-0.87, 1.23])
##
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation.
estimate_contrasts(mod_gaus, contrast = c("Castrado"))
## Marginal Contrasts Analysis
##
## Level1 | Level2 | Difference | 95% CI | SE | t(13) | p
## -------------------------------------------------------------------------
## Castrado0 | Castrado1 | -3.45e-03 | [-0.03, 0.02] | 0.01 | -0.34 | 0.738
##
## Marginal contrasts estimated at Castrado
## p-value adjustment method: Holm (1979)
Análisis del indice de Shannon
mod_gaus <- glm(DiversityValue ~Castrado, data = div %>%
filter(DiversityIndex == "Shannon")%>%
mutate(Sexo = as.factor(Sexo)),
family = gaussian)
report(mod_gaus)
## We fitted a linear model (estimated using ML) to predict DiversityValue with
## Castrado (formula: DiversityValue ~ Castrado). The model's explanatory power is
## very weak (R2 = 6.68e-03). The model's intercept, corresponding to Castrado =
## 0, is at 3.57 (95% CI [3.35, 3.78], t(13) = 32.42, p < .001). Within this
## model:
##
## - The effect of Castrado [1] is statistically non-significant and positive
## (beta = 0.04, 95% CI [-0.25, 0.34], t(13) = 0.30, p = 0.767; Std. beta = 0.16,
## 95% CI [-0.89, 1.21])
##
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation.
estimate_contrasts(mod_gaus, contrast = c("Castrado"))
## Marginal Contrasts Analysis
##
## Level1 | Level2 | Difference | 95% CI | SE | t(13) | p
## -------------------------------------------------------------------------
## Castrado0 | Castrado1 | -0.04 | [-0.37, 0.28] | 0.15 | -0.30 | 0.772
##
## Marginal contrasts estimated at Castrado
## p-value adjustment method: Holm (1979)
Vamos a calcular la Beta-diversidad.
dist <- avgdist(otu_table %>% t(),sample = samplingSize$TotalReads, dmethod = "bray") %>%
as.matrix() %>%
as_tibble(rownames = "Sample") %>%
inner_join(metadata)
## Joining, by = "Sample"
pheatmap::pheatmap(dist %>%
dplyr::select(Sample,contains("report")) %>%
column_to_rownames("Sample") %>%
as.matrix() %>%
as.dist(),
annotation_row = dist %>%
dplyr::select(Sample,Castrado) %>%
mutate(Castrado = as.factor(Castrado)) %>%
column_to_rownames("Sample"))
X = otu_table %>% t()
Y = otu_table %>%
t() %>%
as_tibble(rownames = "Sample") %>% inner_join(metadata %>% dplyr::select(Sample,Castrado)) %>% pull(Castrado)
## Joining, by = "Sample"
pl <- splsda(X,Y, ncomp = 10, scale = T)
## Warning in cor(A[[k]], variates.A[[k]]): the standard deviation is zero
plotIndiv(pl, ind.names = T, ellipse = TRUE, legend =TRUE)
dist_ad <-dist %>%
dplyr::select(Sample,contains("report")) %>%
column_to_rownames("Sample") %>%
as.matrix() %>%
as.dist()
adonis2(dist_ad ~ Castrado, dist %>%
dplyr::select(Sample,Castrado) %>%
column_to_rownames("Sample"))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_ad ~ Castrado, data = dist %>% dplyr::select(Sample, Castrado) %>% column_to_rownames("Sample"))
## Df SumOfSqs R2 F Pr(>F)
## Castrado 1 0.0457 0.03321 0.4465 0.827
## Residual 13 1.3304 0.96679
## Total 14 1.3761 1.00000
• Qué taxas son las responsables de las diferencias
gamlss_table <- otu_table %>%
t() %>%
as_tibble(rownames = "Sample") %>%
pivot_longer(names_to = "NCBI", values_to = "Reads",-Sample) %>%
inner_join(metadata) %>%
group_by(Sample) %>%
mutate(TotalReads = sum(Reads), NSpecies = sum(Reads>0)) %>%
ungroup() %>%
group_by(NCBI) %>%
mutate(Nzeros = sum(Reads ==0)) %>%
ungroup() %>%
mutate(NSamples = n_distinct(Sample)) %>%
mutate(Freq = (Reads+1)/TotalReads) %>%
mutate(ZeroFreq = Nzeros/NSamples) %>%
filter(ZeroFreq < 0.9) %>%
dplyr::select(NCBI,Reads,Freq,ZeroFreq,Castrado,Sample) %>%
mutate(Reads = Reads+1) %>%
nest_by(NCBI) %>%
mutate(model = list(gamlss(formula = Freq ~ Castrado,
data = data,
family = "BE",
control = gamlss.control(n.cyc = 200, trace = F)))) %>%
mutate(test = list(emmeans(model, specs = "Castrado", data = data) %>%
pairs() %>%
as.data.frame()))
## Joining, by = "Sample"
gamlss_table %>%
dplyr::select(NCBI,test) %>%
unnest(test) %>%
mutate(padj = p.adjust(p.value,method = "BH")) %>%
inner_join(table %>%
dplyr::select(NCBI,Name2) %>%
mutate(NCBI = as.character(NCBI)) %>%
distinct()) %>%
dplyr::select(NCBI,Name = Name2,contrast,z.ratio,padj) %>%
write_tsv("Cuestion3.1.tsv")
## Joining, by = "NCBI"
gamlss_table %>%
dplyr::select(NCBI,test) %>%
unnest(test) %>%
mutate(padj = p.adjust(p.value,method = "BH")) %>%
inner_join(table %>%
dplyr::select(NCBI,Name2) %>%
mutate(NCBI = as.character(NCBI)) %>%
distinct()) %>%
dplyr::select(NCBI,Name = Name2,contrast,z.ratio,padj) %>%
#separate(contrast, c("A","B"), sep = " - ") %>%
mutate(sig = ifelse(padj < 0.01,"Significative","No-Significative")) %>%
mutate(label = ifelse(sig == "Significative",Name,NA)) %>%
ggplot(aes(x= z.ratio, y = -log10(padj), color = sig, label = label)) +
geom_point(alpha = 0.5) +
geom_text_repel(size = 4)+
scale_color_d3() +
theme_light()
## Joining, by = "NCBI"
## Warning: Removed 1065 rows containing missing values (`geom_text_repel()`).
Hembras Tratamiento control vs Hembras Tratamiento (Dieta NO, Studio3, Machos NO) formula: ~Tratamiento
Creamos la OTU-table para resolver la pregunta.
samplingSize <- table %>%
filter(Name2 == "Bacteria") %>%
filter(Studio ==3) %>%
filter(Description =="Adulta") %>%
filter(Sexo == 0) %>%
#filter(Tratamiento ==0) %>%
filter(Dieta ==0) %>%
group_by(Sample) %>%
summarise(TotalReads = sum(Reads)) %>%
slice_min(TotalReads)
otu_table <- table %>%
filter(!grepl("S",TaxRank)) %>%
filter(!grepl("U",TaxRank)) %>%
filter(!grepl("R",TaxRank)) %>%
filter(!grepl("D",TaxRank)) %>%
filter(Studio ==3) %>%
filter(Description =="Adulta") %>%
filter(Sexo == 0) %>%
#filter(Tratamiento ==0) %>%
filter(Dieta ==0) %>%
dplyr::select(Sample,Reads,NCBI) %>%
pivot_wider(names_from = Sample,
values_from = Reads,
values_fill =0) %>%
column_to_rownames("NCBI") %>%
as.matrix() %>%
t() %>%
rrarefy(sample = samplingSize$TotalReads) %>%
t()
Calculamos la alpha diversity
simpson <- diversity(otu_table, MARGIN = 2, index = "simpson") %>%
as.data.frame() %>%
rownames_to_column("Sample") %>%
rename(Simpson =2)
shannon <- diversity(otu_table, MARGIN = 2,index = "shannon" ) %>%
as.data.frame() %>%
rownames_to_column("Sample") %>%
rename(Shannon = 2)
div <- inner_join(simpson, shannon) %>%
pivot_longer(names_to = "DiversityIndex",values_to = "DiversityValue", -Sample) %>%
inner_join(metadata)
## Joining, by = "Sample"
## Joining, by = "Sample"
Análisis del indice de Simpson
mod_gaus <- glm(DiversityValue ~ Tratamiento, data = div %>%
filter(DiversityIndex == "Simpson"),
family = gaussian)
report(mod_gaus)
## We fitted a linear model (estimated using ML) to predict DiversityValue with
## Tratamiento (formula: DiversityValue ~ Tratamiento). The model's explanatory
## power is substantial (R2 = 0.72). The model's intercept, corresponding to
## Tratamiento = 0, is at 0.93 (95% CI [0.93, 0.94], t(15) = 334.92, p < .001).
## Within this model:
##
## - The effect of Tratamiento [1] is statistically significant and positive (beta
## = 0.02, 95% CI [0.02, 0.03], t(15) = 6.24, p < .001; Std. beta = 1.67, 95% CI
## [1.15, 2.20])
##
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation.
parameters(mod_gaus)
## Parameter | Coefficient | SE | 95% CI | t(15) | p
## -------------------------------------------------------------------------
## (Intercept) | 0.93 | 2.79e-03 | [0.93, 0.94] | 334.92 | < .001
## Tratamiento [1] | 0.02 | 3.63e-03 | [0.02, 0.03] | 6.24 | < .001
##
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
## computed using a Wald t-distribution approximation.
estimate_contrasts(mod_gaus, contrast = c("Tratamiento"))
## Marginal Contrasts Analysis
##
## Level1 | Level2 | Difference | 95% CI | SE | t(15) | p
## -------------------------------------------------------------------------------------
## Tratamiento0 | Tratamiento1 | -0.02 | [-0.03, -0.01] | 3.63e-03 | -6.24 | < .001
##
## Marginal contrasts estimated at Tratamiento
## p-value adjustment method: Holm (1979)
Análisis del indice de Shannon
mod_gaus <- glm(DiversityValue ~Tratamiento, data = div %>%
filter(DiversityIndex == "Shannon"),
family = gaussian)
report(mod_gaus)
## We fitted a linear model (estimated using ML) to predict DiversityValue with
## Tratamiento (formula: DiversityValue ~ Tratamiento). The model's explanatory
## power is substantial (R2 = 0.63). The model's intercept, corresponding to
## Tratamiento = 0, is at 3.23 (95% CI [3.10, 3.35], t(15) = 48.80, p < .001).
## Within this model:
##
## - The effect of Tratamiento [1] is statistically significant and positive (beta
## = 0.43, 95% CI [0.27, 0.60], t(15) = 5.04, p < .001; Std. beta = 1.56, 95% CI
## [0.95, 2.17])
##
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation.
parameters(mod_gaus)
## Parameter | Coefficient | SE | 95% CI | t(15) | p
## --------------------------------------------------------------------
## (Intercept) | 3.23 | 0.07 | [3.10, 3.35] | 48.80 | < .001
## Tratamiento [1] | 0.43 | 0.09 | [0.27, 0.60] | 5.04 | < .001
##
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
## computed using a Wald t-distribution approximation.
estimate_contrasts(mod_gaus, contrast = c("Tratamiento"))
## Marginal Contrasts Analysis
##
## Level1 | Level2 | Difference | 95% CI | SE | t(15) | p
## ---------------------------------------------------------------------------------
## Tratamiento0 | Tratamiento1 | -0.43 | [-0.62, -0.25] | 0.09 | -5.04 | < .001
##
## Marginal contrasts estimated at Tratamiento
## p-value adjustment method: Holm (1979)
Vamos a calcular la Beta-diversidad.
dist <- avgdist(otu_table %>% t(),sample = samplingSize$TotalReads, dmethod = "bray") %>%
as.matrix() %>%
as_tibble(rownames = "Sample") %>%
inner_join(metadata)
## Joining, by = "Sample"
pheatmap::pheatmap(dist %>%
dplyr::select(Sample,contains("report")) %>%
column_to_rownames("Sample") %>%
as.matrix() %>%
as.dist(),
annotation_row = dist %>%
dplyr::select(Sample,Tratamiento) %>%
mutate(Tratamiento = as.factor(Tratamiento)) %>%
column_to_rownames("Sample"))
X = otu_table %>% t()
Y = otu_table %>%
t() %>%
as_tibble(rownames = "Sample") %>% inner_join(metadata %>% dplyr::select(Sample,Tratamiento)) %>% pull(Tratamiento)
## Joining, by = "Sample"
pl <- splsda(X,Y, ncomp = 10, scale = T)
## Warning in cor(A[[k]], variates.A[[k]]): the standard deviation is zero
plotIndiv(pl, ind.names = T, ellipse = TRUE, legend =TRUE)
dist_ad <-dist %>%
dplyr::select(Sample,contains("report")) %>%
column_to_rownames("Sample") %>%
as.matrix() %>%
as.dist()
adonis2(dist_ad ~ Tratamiento, dist %>%
dplyr::select(Sample,Tratamiento) %>%
column_to_rownames("Sample"))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_ad ~ Tratamiento, data = dist %>% dplyr::select(Sample, Tratamiento) %>% column_to_rownames("Sample"))
## Df SumOfSqs R2 F Pr(>F)
## Tratamiento 1 0.15876 0.12138 2.0722 0.08 .
## Residual 15 1.14922 0.87862
## Total 16 1.30798 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
• Qué taxas son las responsables de las diferencias
gamlss_table <- otu_table %>%
t() %>%
as_tibble(rownames = "Sample") %>%
pivot_longer(names_to = "NCBI", values_to = "Reads",-Sample) %>%
inner_join(metadata) %>%
group_by(Sample) %>%
mutate(TotalReads = sum(Reads), NSpecies = sum(Reads>0)) %>%
ungroup() %>%
group_by(NCBI) %>%
mutate(Nzeros = sum(Reads ==0)) %>%
ungroup() %>%
mutate(NSamples = n_distinct(Sample)) %>%
mutate(Freq = (Reads+1)/TotalReads) %>%
mutate(ZeroFreq = Nzeros/NSamples) %>%
filter(ZeroFreq < 0.9) %>%
dplyr::select(NCBI,Reads,Freq,ZeroFreq,Tratamiento,Sample) %>%
mutate(Reads = Reads+1) %>%
nest_by(NCBI) %>%
mutate(model = list(gamlss(formula = Freq ~ Tratamiento,
data = data,
family = "BE",
control = gamlss.control(n.cyc = 200, trace = F)))) %>%
mutate(test = list(emmeans(model, specs = "Tratamiento", data = data) %>%
pairs() %>%
as.data.frame()))
## Joining, by = "Sample"
gamlss_table %>%
dplyr::select(NCBI,test) %>%
unnest(test) %>%
mutate(padj = p.adjust(p.value,method = "BH")) %>%
inner_join(table %>%
dplyr::select(NCBI,Name2) %>%
mutate(NCBI = as.character(NCBI)) %>%
distinct()) %>%
dplyr::select(NCBI,Name = Name2,contrast,z.ratio,padj) %>%
write_tsv("Cuestion3.2.tsv")
## Joining, by = "NCBI"
gamlss_table %>%
dplyr::select(NCBI,test) %>%
unnest(test) %>%
mutate(padj = p.adjust(p.value,method = "BH")) %>%
inner_join(table %>%
dplyr::select(NCBI,Name2) %>%
mutate(NCBI = as.character(NCBI)) %>%
distinct()) %>%
dplyr::select(NCBI,Name = Name2,contrast,z.ratio,padj) %>%
#separate(contrast, c("A","B"), sep = " - ") %>%
mutate(sig = ifelse(padj < 0.01,"Significative","No-Significative")) %>%
mutate(label = ifelse(sig == "Significative",Name,NA)) %>%
ggplot(aes(x= z.ratio, y = -log10(padj), color = sig, label = label)) +
geom_point(alpha = 0.5) +
geom_text_repel(size = 4)+
scale_color_d3() +
theme_light()
## Joining, by = "NCBI"
## Warning: Removed 1026 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 22 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
Machos vs Machos Castrados (Dieta SI/NO, Tratamiento Control, Studio 3, Hembras NO) formula: ~Castrado * Dieta
Creamos la OTU-table para resolver la pregunta.
samplingSize <- table %>%
filter(Name2 == "Bacteria") %>%
filter(Studio ==3) %>%
filter(Description =="Adulta") %>%
filter(Sexo == 1) %>%
filter(Tratamiento ==0) %>%
#filter(Dieta ==0) %>%
group_by(Sample) %>%
summarise(TotalReads = sum(Reads)) %>%
slice_min(TotalReads)
otu_table <- table %>%
filter(!grepl("S",TaxRank)) %>%
filter(!grepl("U",TaxRank)) %>%
filter(!grepl("R",TaxRank)) %>%
filter(!grepl("D",TaxRank)) %>%
filter(Studio ==3) %>%
filter(Description =="Adulta") %>%
filter(Sexo == 1) %>%
filter(Tratamiento ==0) %>%
#filter(Dieta ==0) %>%
dplyr::select(Sample,Reads,NCBI) %>%
pivot_wider(names_from = Sample,
values_from = Reads,
values_fill =0) %>%
column_to_rownames("NCBI") %>%
as.matrix() %>%
t() %>%
rrarefy(sample = samplingSize$TotalReads) %>%
t()
Calculamos la alpha diversity
simpson <- diversity(otu_table, MARGIN = 2, index = "simpson") %>%
as.data.frame() %>%
rownames_to_column("Sample") %>%
rename(Simpson =2)
shannon <- diversity(otu_table, MARGIN = 2,index = "shannon" ) %>%
as.data.frame() %>%
rownames_to_column("Sample") %>%
rename(Shannon = 2)
div <- inner_join(simpson, shannon) %>%
pivot_longer(names_to = "DiversityIndex",values_to = "DiversityValue", -Sample) %>%
inner_join(metadata)
## Joining, by = "Sample"
## Joining, by = "Sample"
Análisis del indice de Simpson
mod_gaus <- glm(DiversityValue ~ Castrado * Dieta, data = div %>%
filter(DiversityIndex == "Simpson"),
family = gaussian)
report(mod_gaus)
## We fitted a linear model (estimated using ML) to predict DiversityValue with
## Castrado and Dieta (formula: DiversityValue ~ Castrado * Dieta). The model's
## explanatory power is moderate (R2 = 0.15). The model's intercept, corresponding
## to Castrado = 0 and Dieta = 0, is at 0.95 (95% CI [0.93, 0.96], t(28) = 135.78,
## p < .001). Within this model:
##
## - The effect of Castrado [1] is statistically non-significant and positive
## (beta = 3.40e-03, 95% CI [-0.02, 0.02], t(28) = 0.36, p = 0.722; Std. beta =
## 0.18, 95% CI [-0.81, 1.16])
## - The effect of Dieta [1] is statistically non-significant and negative (beta =
## -0.01, 95% CI [-0.03, 3.73e-03], t(28) = -1.56, p = 0.119; Std. beta = -0.76,
## 95% CI [-1.72, 0.20])
## - The effect of Castrado [1] × Dieta [1] is statistically non-significant and
## positive (beta = 2.16e-03, 95% CI [-0.02, 0.03], t(28) = 0.16, p = 0.869; Std.
## beta = 0.11, 95% CI [-1.24, 1.46])
##
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation.
parameters(mod_gaus)
## Parameter | Coefficient | SE | 95% CI | t(28) | p
## -----------------------------------------------------------------------------------
## (Intercept) | 0.95 | 6.97e-03 | [ 0.93, 0.96] | 135.78 | < .001
## Castrado [1] | 3.40e-03 | 9.55e-03 | [-0.02, 0.02] | 0.36 | 0.722
## Dieta [1] | -0.01 | 9.30e-03 | [-0.03, 0.00] | -1.56 | 0.119
## Castrado [1] × Dieta [1] | 2.16e-03 | 0.01 | [-0.02, 0.03] | 0.16 | 0.869
##
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
## computed using a Wald t-distribution approximation.
estimate_contrasts(mod_gaus, contrast = c("Castrado","Dieta"))
## Marginal Contrasts Analysis
##
## Level1 | Level2 | Difference | 95% CI | SE | t(28) | p
## --------------------------------------------------------------------------------------------
## Castrado0 Dieta0 | Castrado0 Dieta1 | 0.01 | [-0.01, 0.04] | 9.30e-03 | 1.56 | 0.651
## Castrado0 Dieta0 | Castrado1 Dieta0 | -3.40e-03 | [-0.03, 0.02] | 9.55e-03 | -0.36 | > .999
## Castrado0 Dieta0 | Castrado1 Dieta1 | 8.94e-03 | [-0.02, 0.04] | 9.55e-03 | 0.94 | > .999
## Castrado0 Dieta1 | Castrado1 Dieta1 | -5.56e-03 | [-0.03, 0.02] | 8.97e-03 | -0.62 | > .999
## Castrado1 Dieta0 | Castrado0 Dieta1 | 0.02 | [-0.01, 0.04] | 8.97e-03 | 2.00 | 0.334
## Castrado1 Dieta0 | Castrado1 Dieta1 | 0.01 | [-0.01, 0.04] | 9.23e-03 | 1.34 | 0.768
##
## Marginal contrasts estimated at Castrado, Dieta
## p-value adjustment method: Holm (1979)
Análisis del indice de Shannon
mod_gaus <- glm(DiversityValue ~Castrado * Dieta, data = div %>%
filter(DiversityIndex == "Shannon"),
family = gaussian)
report(mod_gaus)
## We fitted a linear model (estimated using ML) to predict DiversityValue with
## Castrado and Dieta (formula: DiversityValue ~ Castrado * Dieta). The model's
## explanatory power is moderate (R2 = 0.19). The model's intercept, corresponding
## to Castrado = 0 and Dieta = 0, is at 3.57 (95% CI [3.36, 3.78], t(28) = 33.41,
## p < .001). Within this model:
##
## - The effect of Castrado [1] is statistically non-significant and positive
## (beta = 0.04, 95% CI [-0.24, 0.33], t(28) = 0.29, p = 0.775; Std. beta = 0.14,
## 95% CI [-0.82, 1.10])
## - The effect of Dieta [1] is statistically non-significant and negative (beta =
## -0.27, 95% CI [-0.55, 9.79e-03], t(28) = -1.89, p = 0.059; Std. beta = -0.90,
## 95% CI [-1.84, 0.03])
## - The effect of Castrado [1] × Dieta [1] is statistically non-significant and
## positive (beta = 0.06, 95% CI [-0.33, 0.46], t(28) = 0.31, p = 0.755; Std. beta
## = 0.21, 95% CI [-1.11, 1.53])
##
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation.
parameters(mod_gaus)
## Parameter | Coefficient | SE | 95% CI | t(28) | p
## ------------------------------------------------------------------------------
## (Intercept) | 3.57 | 0.11 | [ 3.36, 3.78] | 33.41 | < .001
## Castrado [1] | 0.04 | 0.15 | [-0.24, 0.33] | 0.29 | 0.775
## Dieta [1] | -0.27 | 0.14 | [-0.55, 0.01] | -1.89 | 0.059
## Castrado [1] × Dieta [1] | 0.06 | 0.20 | [-0.33, 0.46] | 0.31 | 0.755
##
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
## computed using a Wald t-distribution approximation.
estimate_contrasts(mod_gaus, contrast = c("Castrado","Dieta"))
## Marginal Contrasts Analysis
##
## Level1 | Level2 | Difference | 95% CI | SE | t(28) | p
## ---------------------------------------------------------------------------------------
## Castrado0 Dieta0 | Castrado0 Dieta1 | 0.27 | [-0.13, 0.67] | 0.14 | 1.89 | 0.345
## Castrado0 Dieta0 | Castrado1 Dieta0 | -0.04 | [-0.46, 0.37] | 0.15 | -0.29 | 0.908
## Castrado0 Dieta0 | Castrado1 Dieta1 | 0.16 | [-0.25, 0.58] | 0.15 | 1.13 | 0.806
## Castrado0 Dieta1 | Castrado1 Dieta1 | -0.10 | [-0.49, 0.29] | 0.14 | -0.76 | 0.908
## Castrado1 Dieta0 | Castrado0 Dieta1 | 0.31 | [-0.08, 0.70] | 0.14 | 2.27 | 0.188
## Castrado1 Dieta0 | Castrado1 Dieta1 | 0.21 | [-0.19, 0.61] | 0.14 | 1.46 | 0.618
##
## Marginal contrasts estimated at Castrado, Dieta
## p-value adjustment method: Holm (1979)
Vamos a calcular la Beta-diversidad.
dist <- avgdist(otu_table %>% t(),sample = samplingSize$TotalReads, dmethod = "bray") %>%
as.matrix() %>%
as_tibble(rownames = "Sample") %>%
inner_join(metadata)
## Joining, by = "Sample"
pheatmap::pheatmap(dist %>%
dplyr::select(Sample,contains("report")) %>%
column_to_rownames("Sample") %>%
as.matrix() %>%
as.dist(),
annotation_row = dist %>%
dplyr::select(Sample,Castrado,Dieta) %>%
mutate(Castrado = as.factor(Castrado)) %>%
mutate(Dieta = as.factor(Dieta)) %>%
column_to_rownames("Sample"))
X = otu_table %>% t()
Y = otu_table %>%
t() %>%
as_tibble(rownames = "Sample") %>% inner_join(metadata %>% dplyr::select(Sample,Castrado)) %>% pull(Castrado)
## Joining, by = "Sample"
pl <- splsda(X,Y, ncomp = 10, scale = T)
## Warning in cor(A[[k]], variates.A[[k]]): the standard deviation is zero
plotIndiv(pl, ind.names = T, ellipse = TRUE, legend =TRUE)
Y = otu_table %>%
t() %>%
as_tibble(rownames = "Sample") %>% inner_join(metadata %>% dplyr::select(Sample,Dieta)) %>% pull(Dieta)
## Joining, by = "Sample"
pl <- splsda(X,Y, ncomp = 10, scale = T)
## Warning in cor(A[[k]], variates.A[[k]]): the standard deviation is zero
plotIndiv(pl, ind.names = T, ellipse = TRUE, legend =TRUE)
dist_ad <-dist %>%
dplyr::select(Sample,contains("report")) %>%
column_to_rownames("Sample") %>%
as.matrix() %>%
as.dist()
adonis2(dist_ad ~ Dieta * Castrado, dist %>%
dplyr::select(Sample,Dieta,Castrado) %>%
column_to_rownames("Sample"))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_ad ~ Dieta * Castrado, data = dist %>% dplyr::select(Sample, Dieta, Castrado) %>% column_to_rownames("Sample"))
## Df SumOfSqs R2 F Pr(>F)
## Dieta 1 1.0342 0.29052 12.5845 0.001 ***
## Castrado 1 0.0725 0.02035 0.8817 0.475
## Dieta:Castrado 1 0.1521 0.04273 1.8511 0.125
## Residual 28 2.3010 0.64639
## Total 31 3.5597 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
• Qué taxas son las responsables de las diferencias
gamlss_table <- otu_table %>%
t() %>%
as_tibble(rownames = "Sample") %>%
pivot_longer(names_to = "NCBI", values_to = "Reads",-Sample) %>%
inner_join(metadata) %>%
group_by(Sample) %>%
mutate(TotalReads = sum(Reads), NSpecies = sum(Reads>0)) %>%
ungroup() %>%
group_by(NCBI) %>%
mutate(Nzeros = sum(Reads ==0)) %>%
ungroup() %>%
mutate(NSamples = n_distinct(Sample)) %>%
mutate(Freq = (Reads+1)/TotalReads) %>%
mutate(ZeroFreq = Nzeros/NSamples) %>%
filter(ZeroFreq < 0.9) %>%
dplyr::select(NCBI,Reads,Freq,ZeroFreq,Dieta,Castrado,Sample) %>%
mutate(Reads = Reads+1) %>%
nest_by(NCBI) %>%
mutate(model = list(gamlss(formula = Freq ~ Dieta * Castrado,
data = data,
family = "BE",
control = gamlss.control(n.cyc = 200, trace = F)))) %>%
mutate(test = list(emmeans(model, specs = c("Dieta","Castrado"), data = data) %>%
pairs() %>%
as.data.frame()))
## Joining, by = "Sample"
gamlss_table %>%
dplyr::select(NCBI,test) %>%
unnest(test) %>%
mutate(padj = p.adjust(p.value,method = "BH")) %>%
inner_join(table %>%
dplyr::select(NCBI,Name2) %>%
mutate(NCBI = as.character(NCBI)) %>%
distinct()) %>%
dplyr::select(NCBI,Name = Name2,contrast,z.ratio,padj) %>%
write_tsv("Cuestion4.1.tsv")
## Joining, by = "NCBI"
gamlss_table %>%
dplyr::select(NCBI,test) %>%
unnest(test) %>%
mutate(padj = p.adjust(p.value,method = "BH")) %>%
inner_join(table %>%
dplyr::select(NCBI,Name2) %>%
mutate(NCBI = as.character(NCBI)) %>%
distinct()) %>%
dplyr::select(NCBI,Name = Name2,contrast,z.ratio,padj) %>%
#separate(contrast, c("A","B"), sep = " - ") %>%
mutate(sig = ifelse(padj < 0.01,"Significative","No-Significative")) %>%
mutate(label = ifelse(sig == "Significative",Name,NA)) %>%
ggplot(aes(x= z.ratio, y = -log10(padj), color = sig, label = label)) +
geom_point(alpha = 0.5) +
geom_text_repel(size = 4)+
scale_color_d3() +
theme_light() + facet_wrap(~contrast)
## Joining, by = "NCBI"
## Warning: Removed 6188 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 27 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 11 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 20 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 11 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
Hembras Tratamiento control vs Hembras Tratamiento (Dieta SI/NO, Studio3, Machos NO) forumula: ~Tratamiento * Dieta
Creamos la OTU-table para resolver la pregunta.
samplingSize <- table %>%
filter(Name2 == "Bacteria") %>%
filter(Studio ==3) %>%
filter(Description =="Adulta") %>%
filter(Sexo == 0) %>%
#filter(Tratamiento ==0) %>%
#filter(Dieta ==0) %>%
group_by(Sample) %>%
summarise(TotalReads = sum(Reads)) %>%
slice_min(TotalReads)
otu_table <- table %>%
filter(!grepl("S",TaxRank)) %>%
filter(!grepl("U",TaxRank)) %>%
filter(!grepl("R",TaxRank)) %>%
filter(!grepl("D",TaxRank)) %>%
filter(Studio ==3) %>%
filter(Description =="Adulta") %>%
filter(Sexo == 0) %>%
#filter(Tratamiento ==0) %>%
#filter(Dieta ==0) %>%
dplyr::select(Sample,Reads,NCBI) %>%
pivot_wider(names_from = Sample,
values_from = Reads,
values_fill =0) %>%
column_to_rownames("NCBI") %>%
as.matrix() %>%
t() %>%
rrarefy(sample = samplingSize$TotalReads) %>%
t()
Calculamos la alpha diversity
simpson <- diversity(otu_table, MARGIN = 2, index = "simpson") %>%
as.data.frame() %>%
rownames_to_column("Sample") %>%
rename(Simpson =2)
shannon <- diversity(otu_table, MARGIN = 2,index = "shannon" ) %>%
as.data.frame() %>%
rownames_to_column("Sample") %>%
rename(Shannon = 2)
div <- inner_join(simpson, shannon) %>%
pivot_longer(names_to = "DiversityIndex",values_to = "DiversityValue", -Sample) %>%
inner_join(metadata)
## Joining, by = "Sample"
## Joining, by = "Sample"
Análisis del indice de Simpson
mod_gaus <- glm(DiversityValue ~ Tratamiento * Dieta, data = div %>%
filter(DiversityIndex == "Simpson"),
family = gaussian)
report(mod_gaus)
## We fitted a linear model (estimated using ML) to predict DiversityValue with
## Tratamiento and Dieta (formula: DiversityValue ~ Tratamiento * Dieta). The
## model's explanatory power is moderate (R2 = 0.23). The model's intercept,
## corresponding to Tratamiento = 0 and Dieta = 0, is at 0.93 (95% CI [0.92,
## 0.94], t(32) = 165.91, p < .001). Within this model:
##
## - The effect of Tratamiento [1] is statistically significant and positive (beta
## = 0.02, 95% CI [8.36e-03, 0.04], t(32) = 3.10, p = 0.002; Std. beta = 1.40, 95%
## CI [0.51, 2.28])
## - The effect of Dieta [1] is statistically non-significant and positive (beta =
## 0.01, 95% CI [-3.28e-03, 0.03], t(32) = 1.51, p = 0.130; Std. beta = 0.68, 95%
## CI [-0.20, 1.57])
## - The effect of Tratamiento [1] × Dieta [1] is statistically significant and
## negative (beta = -0.02, 95% CI [-0.04, -1.37e-03], t(32) = -2.10, p = 0.036;
## Std. beta = -1.29, 95% CI [-2.50, -0.08])
##
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation.
parameters(mod_gaus)
## Parameter | Coefficient | SE | 95% CI | t(32) | p
## ---------------------------------------------------------------------------------------
## (Intercept) | 0.93 | 5.63e-03 | [ 0.92, 0.94] | 165.91 | < .001
## Tratamiento [1] | 0.02 | 7.34e-03 | [ 0.01, 0.04] | 3.10 | 0.002
## Dieta [1] | 0.01 | 7.34e-03 | [ 0.00, 0.03] | 1.51 | 0.130
## Tratamiento [1] × Dieta [1] | -0.02 | 0.01 | [-0.04, 0.00] | -2.10 | 0.036
##
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
## computed using a Wald t-distribution approximation.
estimate_contrasts(mod_gaus, contrast = c("Tratamiento","Dieta"))
## Marginal Contrasts Analysis
##
## Level1 | Level2 | Difference | 95% CI | SE | t(32) | p
## --------------------------------------------------------------------------------------------------
## Tratamiento0 Dieta0 | Tratamiento0 Dieta1 | -0.01 | [-0.03, 0.01] | 7.34e-03 | -1.51 | 0.450
## Tratamiento0 Dieta0 | Tratamiento1 Dieta0 | -0.02 | [-0.04, 0.00] | 7.34e-03 | -3.10 | 0.024
## Tratamiento0 Dieta0 | Tratamiento1 Dieta1 | -0.01 | [-0.03, 0.01] | 7.50e-03 | -1.71 | 0.450
## Tratamiento0 Dieta1 | Tratamiento1 Dieta1 | -1.71e-03 | [-0.02, 0.02] | 6.84e-03 | -0.25 | 0.805
## Tratamiento1 Dieta0 | Tratamiento0 Dieta1 | 0.01 | [-0.01, 0.03] | 6.66e-03 | 1.75 | 0.450
## Tratamiento1 Dieta0 | Tratamiento1 Dieta1 | 9.93e-03 | [-0.01, 0.03] | 6.84e-03 | 1.45 | 0.450
##
## Marginal contrasts estimated at Tratamiento, Dieta
## p-value adjustment method: Holm (1979)
Análisis del indice de Shannon
mod_gaus <- glm(DiversityValue ~Tratamiento * Dieta, data = div %>%
filter(DiversityIndex == "Shannon"),
family = gaussian)
report(mod_gaus)
## We fitted a linear model (estimated using ML) to predict DiversityValue with
## Tratamiento and Dieta (formula: DiversityValue ~ Tratamiento * Dieta). The
## model's explanatory power is substantial (R2 = 0.33). The model's intercept,
## corresponding to Tratamiento = 0 and Dieta = 0, is at 3.22 (95% CI [3.06,
## 3.39], t(32) = 37.53, p < .001). Within this model:
##
## - The effect of Tratamiento [1] is statistically significant and positive (beta
## = 0.43, 95% CI [0.21, 0.65], t(32) = 3.87, p < .001; Std. beta = 1.63, 95% CI
## [0.81, 2.46])
## - The effect of Dieta [1] is statistically non-significant and positive (beta =
## 0.19, 95% CI [-0.03, 0.41], t(32) = 1.70, p = 0.088; Std. beta = 0.72, 95% CI
## [-0.11, 1.55])
## - The effect of Tratamiento [1] × Dieta [1] is statistically significant and
## negative (beta = -0.43, 95% CI [-0.73, -0.13], t(32) = -2.82, p = 0.005; Std.
## beta = -1.63, 95% CI [-2.76, -0.49])
##
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation.
parameters(mod_gaus)
## Parameter | Coefficient | SE | 95% CI | t(32) | p
## ----------------------------------------------------------------------------------
## (Intercept) | 3.22 | 0.09 | [ 3.06, 3.39] | 37.53 | < .001
## Tratamiento [1] | 0.43 | 0.11 | [ 0.21, 0.65] | 3.87 | < .001
## Dieta [1] | 0.19 | 0.11 | [-0.03, 0.41] | 1.70 | 0.088
## Tratamiento [1] × Dieta [1] | -0.43 | 0.15 | [-0.73, -0.13] | -2.82 | 0.005
##
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
## computed using a Wald t-distribution approximation.
estimate_contrasts(mod_gaus, contrast = c("Tratamiento","Dieta"))
## Marginal Contrasts Analysis
##
## Level1 | Level2 | Difference | 95% CI | SE | t(32) | p
## ----------------------------------------------------------------------------------------------
## Tratamiento0 Dieta0 | Tratamiento0 Dieta1 | -0.19 | [-0.51, 0.12] | 0.11 | -1.70 | 0.294
## Tratamiento0 Dieta0 | Tratamiento1 Dieta0 | -0.43 | [-0.75, -0.12] | 0.11 | -3.87 | 0.003
## Tratamiento0 Dieta0 | Tratamiento1 Dieta1 | -0.19 | [-0.51, 0.13] | 0.11 | -1.68 | 0.294
## Tratamiento0 Dieta1 | Tratamiento1 Dieta1 | -1.69e-03 | [-0.30, 0.29] | 0.10 | -0.02 | 0.987
## Tratamiento1 Dieta0 | Tratamiento0 Dieta1 | 0.24 | [-0.04, 0.53] | 0.10 | 2.38 | 0.117
## Tratamiento1 Dieta0 | Tratamiento1 Dieta1 | 0.24 | [-0.05, 0.53] | 0.10 | 2.30 | 0.117
##
## Marginal contrasts estimated at Tratamiento, Dieta
## p-value adjustment method: Holm (1979)
Vamos a calcular la Beta-diversidad.
dist <- avgdist(otu_table %>% t(),sample = samplingSize$TotalReads, dmethod = "bray") %>%
as.matrix() %>%
as_tibble(rownames = "Sample") %>%
inner_join(metadata)
## Joining, by = "Sample"
pheatmap::pheatmap(dist %>%
dplyr::select(Sample,contains("report")) %>%
column_to_rownames("Sample") %>%
as.matrix() %>%
as.dist(),
annotation_row = dist %>%
dplyr::select(Sample,Tratamiento,Dieta) %>%
mutate(Tratamiento = as.factor(Tratamiento)) %>%
mutate(Dieta = as.factor(Dieta)) %>%
column_to_rownames("Sample"))
X = otu_table %>% t()
Y = otu_table %>%
t() %>%
as_tibble(rownames = "Sample") %>% inner_join(metadata %>% dplyr::select(Sample,Tratamiento)) %>% pull(Tratamiento)
## Joining, by = "Sample"
pl <- splsda(X,Y, ncomp = 10, scale = T)
## Warning in cor(A[[k]], variates.A[[k]]): the standard deviation is zero
plotIndiv(pl, ind.names = T, ellipse = TRUE, legend =TRUE)
Y = otu_table %>%
t() %>%
as_tibble(rownames = "Sample") %>% inner_join(metadata %>% dplyr::select(Sample,Dieta)) %>% pull(Dieta)
## Joining, by = "Sample"
pl <- splsda(X,Y, ncomp = 10, scale = T)
## Warning in cor(A[[k]], variates.A[[k]]): the standard deviation is zero
plotIndiv(pl, ind.names = T, ellipse = TRUE, legend =TRUE)
dist_ad <-dist %>%
dplyr::select(Sample,contains("report")) %>%
column_to_rownames("Sample") %>%
as.matrix() %>%
as.dist()
adonis2(dist_ad ~ Dieta * Tratamiento, dist %>%
dplyr::select(Sample,Dieta,Tratamiento) %>%
column_to_rownames("Sample"))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_ad ~ Dieta * Tratamiento, data = dist %>% dplyr::select(Sample, Dieta, Tratamiento) %>% column_to_rownames("Sample"))
## Df SumOfSqs R2 F Pr(>F)
## Dieta 1 0.36719 0.11681 4.6384 0.003 **
## Tratamiento 1 0.17939 0.05707 2.2661 0.059 .
## Dieta:Tratamiento 1 0.06377 0.02029 0.8056 0.541
## Residual 32 2.53326 0.80584
## Total 35 3.14362 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
• Qué taxas son las responsables de las diferencias
gamlss_table <- otu_table %>%
t() %>%
as_tibble(rownames = "Sample") %>%
pivot_longer(names_to = "NCBI", values_to = "Reads",-Sample) %>%
inner_join(metadata) %>%
group_by(Sample) %>%
mutate(TotalReads = sum(Reads), NSpecies = sum(Reads>0)) %>%
ungroup() %>%
group_by(NCBI) %>%
mutate(Nzeros = sum(Reads ==0)) %>%
ungroup() %>%
mutate(NSamples = n_distinct(Sample)) %>%
mutate(Freq = (Reads+1)/TotalReads) %>%
mutate(ZeroFreq = Nzeros/NSamples) %>%
filter(ZeroFreq < 0.9) %>%
dplyr::select(NCBI,Reads,Freq,ZeroFreq,Dieta,Tratamiento,Sample) %>%
mutate(Reads = Reads+1) %>%
nest_by(NCBI) %>%
mutate(model = list(gamlss(formula = Freq ~ Dieta + Tratamiento,
data = data,
family = "BE",
control = gamlss.control(n.cyc = 200, trace = F)))) %>%
mutate(test = list(emmeans(model, specs = c("Dieta","Tratamiento"), data = data) %>%
pairs() %>%
as.data.frame()))
## Joining, by = "Sample"
gamlss_table %>%
dplyr::select(NCBI,test) %>%
unnest(test) %>%
mutate(padj = p.adjust(p.value,method = "BH")) %>%
inner_join(table %>%
dplyr::select(NCBI,Name2) %>%
mutate(NCBI = as.character(NCBI)) %>%
distinct()) %>%
dplyr::select(NCBI,Name = Name2,contrast,z.ratio,padj) %>%
write_tsv("Cuestion4.2.tsv")
## Joining, by = "NCBI"
gamlss_table %>%
dplyr::select(NCBI,test) %>%
unnest(test) %>%
mutate(padj = p.adjust(p.value,method = "BH")) %>%
inner_join(table %>%
dplyr::select(NCBI,Name2) %>%
mutate(NCBI = as.character(NCBI)) %>%
distinct()) %>%
dplyr::select(NCBI,Name = Name2,contrast,z.ratio,padj) %>%
#separate(contrast, c("A","B"), sep = " - ") %>%
mutate(sig = ifelse(padj < 0.01,"Significative","No-Significative")) %>%
mutate(label = ifelse(sig == "Significative",Name,NA)) %>%
ggplot(aes(x= z.ratio, y = -log10(padj), color = sig, label = label)) +
geom_point(alpha = 0.5) +
geom_text_repel(size = 4)+
scale_color_d3() +
theme_light() + facet_wrap(~contrast)
## Joining, by = "NCBI"
## Warning: Removed 5848 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 34 unlabeled data points (too many overlaps). Consider increasing max.overlaps
## ggrepel: 34 unlabeled data points (too many overlaps). Consider increasing max.overlaps
## Warning: ggrepel: 35 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
Crear nueva variable. Disfuncion gonadal (SI <- Machos Castrados, Hembras Tratamiento Si, El resto NO) Sampling: Hembras Dieta NO, Tratameinto SI/NO Machos Dieta NO, Tratamiento NO Machos Castrados, Dieta NO, Tratamiento NO formula = ~ Sexo * Disfuncion Gonadal
Creamos la OTU-table para resolver la pregunta.
table2 <- table %>%
mutate(DisfuncionGonadal = 0) %>%
mutate(DisfuncionGonadal = ifelse(Castrado ==1,1,DisfuncionGonadal)) %>%
mutate(DisfuncionGonadal = ifelse((Sexo ==0 & Tratamiento ==1),1,DisfuncionGonadal)) %>%
mutate(DisfuncionGonadal = as.factor(DisfuncionGonadal))
metadata2 <- metadata %>%
mutate(DisfuncionGonadal = 0) %>%
mutate(DisfuncionGonadal = ifelse(Castrado ==1,1,DisfuncionGonadal)) %>%
mutate(DisfuncionGonadal = ifelse((Sexo ==0 & Tratamiento ==1),1,DisfuncionGonadal)) %>%
mutate(DisfuncionGonadal = as.factor(DisfuncionGonadal))
samplesGroup <- metadata %>%
filter(Studio ==3) %>%
filter(Description =="Adulta") %>%
filter(Sexo ==0 & Dieta ==0) %>%
bind_rows(metadata %>%
filter(Studio ==3) %>%
filter(Description =="Adulta") %>%
filter(Sexo ==1 &Tratamiento ==0 & Dieta==0)) %>%
dplyr::select(Sample)
samplingSize <- table2 %>%
filter(Name2 == "Bacteria") %>%
semi_join(samplesGroup) %>%
group_by(Sample) %>%
summarise(TotalReads = sum(Reads)) %>%
slice_min(TotalReads)
## Joining, by = "Sample"
otu_table <- table2 %>%
filter(!grepl("S",TaxRank)) %>%
filter(!grepl("U",TaxRank)) %>%
filter(!grepl("R",TaxRank)) %>%
filter(!grepl("D",TaxRank)) %>%
semi_join(samplesGroup) %>%
dplyr::select(Sample,Reads,NCBI) %>%
pivot_wider(names_from = Sample,
values_from = Reads,
values_fill =0) %>%
column_to_rownames("NCBI") %>%
as.matrix() %>%
t() %>%
rrarefy(sample = samplingSize$TotalReads) %>%
t()
## Joining, by = "Sample"
Calculamos la alpha diversity
simpson <- diversity(otu_table, MARGIN = 2, index = "simpson") %>%
as.data.frame() %>%
rownames_to_column("Sample") %>%
rename(Simpson =2)
shannon <- diversity(otu_table, MARGIN = 2,index = "shannon" ) %>%
as.data.frame() %>%
rownames_to_column("Sample") %>%
rename(Shannon = 2)
div <- inner_join(simpson, shannon) %>%
pivot_longer(names_to = "DiversityIndex",values_to = "DiversityValue", -Sample) %>%
inner_join(metadata2)
## Joining, by = "Sample"
## Joining, by = "Sample"
Análisis del indice de Simpson
mod_gaus <- glm(DiversityValue ~ Sexo*DisfuncionGonadal, data = div %>%
filter(DiversityIndex == "Simpson"),
family = gaussian)
report(mod_gaus)
## We fitted a linear model (estimated using ML) to predict DiversityValue with
## Sexo and DisfuncionGonadal (formula: DiversityValue ~ Sexo *
## DisfuncionGonadal). The model's explanatory power is substantial (R2 = 0.27).
## The model's intercept, corresponding to Sexo = 0 and DisfuncionGonadal = 0, is
## at 0.93 (95% CI [0.92, 0.94], t(28) = 171.58, p < .001). Within this model:
##
## - The effect of Sexo [1] is statistically non-significant and positive (beta =
## 0.01, 95% CI [-1.97e-03, 0.03], t(28) = 1.70, p = 0.088; Std. beta = 0.82, 95%
## CI [-0.12, 1.76])
## - The effect of DisfuncionGonadal [1] is statistically significant and positive
## (beta = 0.02, 95% CI [8.60e-03, 0.04], t(28) = 3.17, p = 0.002; Std. beta =
## 1.40, 95% CI [0.54, 2.27])
## - The effect of Sexo [1] × DisfuncionGonadal [1] is statistically
## non-significant and negative (beta = -0.02, 95% CI [-0.04, 1.37e-03], t(28) =
## -1.83, p = 0.068; Std. beta = -1.17, 95% CI [-2.43, 0.09])
##
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation.
parameters(mod_gaus)
## Parameter | Coefficient | SE | 95% CI | t(28) | p
## -------------------------------------------------------------------------------------------
## (Intercept) | 0.93 | 5.44e-03 | [ 0.92, 0.94] | 171.58 | < .001
## Sexo [1] | 0.01 | 7.69e-03 | [ 0.00, 0.03] | 1.70 | 0.088
## DisfuncionGonadal [1] | 0.02 | 7.09e-03 | [ 0.01, 0.04] | 3.17 | 0.002
## Sexo [1] × DisfuncionGonadal [1] | -0.02 | 0.01 | [-0.04, 0.00] | -1.83 | 0.068
##
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
## computed using a Wald t-distribution approximation.
estimate_contrasts(mod_gaus, contrast = c("DisfuncionGonadal","Sexo"))
## Marginal Contrasts Analysis
##
## Level1 | Level2 | Difference | 95% CI | SE | t(28) | p
## ------------------------------------------------------------------------------------------------------------
## DisfuncionGonadal0 Sexo0 | DisfuncionGonadal0 Sexo1 | -0.01 | [-0.03, 0.01] | 7.69e-03 | -1.70 | 0.398
## DisfuncionGonadal0 Sexo0 | DisfuncionGonadal1 Sexo0 | -0.02 | [-0.04, 0.00] | 7.09e-03 | -3.17 | 0.022
## DisfuncionGonadal0 Sexo0 | DisfuncionGonadal1 Sexo1 | -0.02 | [-0.04, 0.00] | 7.45e-03 | -2.26 | 0.160
## DisfuncionGonadal0 Sexo1 | DisfuncionGonadal1 Sexo1 | -3.71e-03 | [-0.02, 0.02] | 7.45e-03 | -0.50 | 0.825
## DisfuncionGonadal1 Sexo0 | DisfuncionGonadal0 Sexo1 | 9.39e-03 | [-0.01, 0.03] | 7.09e-03 | 1.32 | 0.589
## DisfuncionGonadal1 Sexo0 | DisfuncionGonadal1 Sexo1 | 5.68e-03 | [-0.01, 0.03] | 6.83e-03 | 0.83 | 0.825
##
## Marginal contrasts estimated at DisfuncionGonadal, Sexo
## p-value adjustment method: Holm (1979)
Análisis del indice de Shannon
mod_gaus <- glm(DiversityValue ~Sexo * DisfuncionGonadal, data = div %>%
filter(DiversityIndex == "Shannon"),
family = gaussian)
report(mod_gaus)
## We fitted a linear model (estimated using ML) to predict DiversityValue with
## Sexo and DisfuncionGonadal (formula: DiversityValue ~ Sexo *
## DisfuncionGonadal). The model's explanatory power is substantial (R2 = 0.35).
## The model's intercept, corresponding to Sexo = 0 and DisfuncionGonadal = 0, is
## at 3.22 (95% CI [3.05, 3.40], t(28) = 35.82, p < .001). Within this model:
##
## - The effect of Sexo [1] is statistically significant and positive (beta =
## 0.33, 95% CI [0.09, 0.58], t(28) = 2.63, p = 0.009; Std. beta = 1.19, 95% CI
## [0.30, 2.08])
## - The effect of DisfuncionGonadal [1] is statistically significant and positive
## (beta = 0.43, 95% CI [0.20, 0.66], t(28) = 3.67, p < .001; Std. beta = 1.53,
## 95% CI [0.71, 2.35])
## - The effect of Sexo [1] × DisfuncionGonadal [1] is statistically significant
## and negative (beta = -0.38, 95% CI [-0.72, -0.05], t(28) = -2.25, p = 0.024;
## Std. beta = -1.36, 95% CI [-2.55, -0.18])
##
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation.
parameters(mod_gaus)
## Parameter | Coefficient | SE | 95% CI | t(28) | p
## ---------------------------------------------------------------------------------------
## (Intercept) | 3.22 | 0.09 | [ 3.05, 3.40] | 35.82 | < .001
## Sexo [1] | 0.33 | 0.13 | [ 0.09, 0.58] | 2.63 | 0.009
## DisfuncionGonadal [1] | 0.43 | 0.12 | [ 0.20, 0.66] | 3.67 | < .001
## Sexo [1] × DisfuncionGonadal [1] | -0.38 | 0.17 | [-0.72, -0.05] | -2.25 | 0.024
##
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
## computed using a Wald t-distribution approximation.
estimate_contrasts(mod_gaus, contrast = c("DisfuncionGonadal","Sexo"))
## Marginal Contrasts Analysis
##
## Level1 | Level2 | Difference | 95% CI | SE | t(28) | p
## ---------------------------------------------------------------------------------------------------------
## DisfuncionGonadal0 Sexo0 | DisfuncionGonadal0 Sexo1 | -0.33 | [-0.70, 0.03] | 0.13 | -2.63 | 0.055
## DisfuncionGonadal0 Sexo0 | DisfuncionGonadal1 Sexo0 | -0.43 | [-0.76, -0.10] | 0.12 | -3.67 | 0.006
## DisfuncionGonadal0 Sexo0 | DisfuncionGonadal1 Sexo1 | -0.38 | [-0.73, -0.03] | 0.12 | -3.10 | 0.022
## DisfuncionGonadal0 Sexo1 | DisfuncionGonadal1 Sexo1 | -0.05 | [-0.40, 0.30] | 0.12 | -0.39 | > .999
## DisfuncionGonadal1 Sexo0 | DisfuncionGonadal0 Sexo1 | 0.10 | [-0.24, 0.43] | 0.12 | 0.82 | > .999
## DisfuncionGonadal1 Sexo0 | DisfuncionGonadal1 Sexo1 | 0.05 | [-0.27, 0.37] | 0.11 | 0.43 | > .999
##
## Marginal contrasts estimated at DisfuncionGonadal, Sexo
## p-value adjustment method: Holm (1979)
Vamos a calcular la Beta-diversidad.
dist <- avgdist(otu_table %>% t(),sample = samplingSize$TotalReads, dmethod = "bray") %>%
as.matrix() %>%
as_tibble(rownames = "Sample") %>%
inner_join(metadata2)
## Joining, by = "Sample"
pheatmap::pheatmap(dist %>%
dplyr::select(Sample,contains("report")) %>%
column_to_rownames("Sample") %>%
as.matrix() %>%
as.dist(),
annotation_row = dist %>%
dplyr::select(Sample,Sexo,DisfuncionGonadal) %>%
mutate(Sexo = as.factor(Sexo)) %>%
mutate(DisfuncionGonadal = as.factor(DisfuncionGonadal)) %>%
column_to_rownames("Sample"))
X = otu_table %>% t()
Y = otu_table %>%
t() %>%
as_tibble(rownames = "Sample") %>% inner_join(metadata %>% dplyr::select(Sample,Sexo)) %>% pull(Sexo)
## Joining, by = "Sample"
pl <- splsda(X,Y, ncomp = 10, scale = T)
## Warning in cor(A[[k]], variates.A[[k]]): the standard deviation is zero
plotIndiv(pl, ind.names = T, ellipse = TRUE, legend =TRUE)
Y = otu_table %>%
t() %>%
as_tibble(rownames = "Sample") %>% inner_join(metadata2 %>% dplyr::select(Sample,DisfuncionGonadal)) %>% pull(DisfuncionGonadal)
## Joining, by = "Sample"
pl <- splsda(X,Y, ncomp = 10, scale = T)
## Warning in cor(A[[k]], variates.A[[k]]): the standard deviation is zero
plotIndiv(pl, ind.names = T, ellipse = TRUE, legend =TRUE)
dist_ad <-dist %>%
dplyr::select(Sample,contains("report")) %>%
column_to_rownames("Sample") %>%
as.matrix() %>%
as.dist()
adonis2(dist_ad ~ Sexo * DisfuncionGonadal, dist %>%
dplyr::select(Sample,Sexo,DisfuncionGonadal) %>%
column_to_rownames("Sample"))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_ad ~ Sexo * DisfuncionGonadal, data = dist %>% dplyr::select(Sample, Sexo, DisfuncionGonadal) %>% column_to_rownames("Sample"))
## Df SumOfSqs R2 F Pr(>F)
## Sexo 1 0.18963 0.06585 2.1354 0.055 .
## DisfuncionGonadal 1 0.10742 0.03730 1.2097 0.299
## Sexo:DisfuncionGonadal 1 0.09640 0.03347 1.0855 0.359
## Residual 28 2.48646 0.86338
## Total 31 2.87990 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
• Qué taxas son las responsables de las diferencias
gamlss_table <- otu_table %>%
t() %>%
as_tibble(rownames = "Sample") %>%
pivot_longer(names_to = "NCBI", values_to = "Reads",-Sample) %>%
inner_join(metadata2) %>%
group_by(Sample) %>%
mutate(TotalReads = sum(Reads), NSpecies = sum(Reads>0)) %>%
ungroup() %>%
group_by(NCBI) %>%
mutate(Nzeros = sum(Reads ==0)) %>%
ungroup() %>%
mutate(NSamples = n_distinct(Sample)) %>%
mutate(Freq = (Reads+1)/TotalReads) %>%
mutate(ZeroFreq = Nzeros/NSamples) %>%
filter(ZeroFreq < 0.9) %>%
dplyr::select(NCBI,Reads,Freq,ZeroFreq,Sexo,DisfuncionGonadal,Sample) %>%
mutate(Reads = Reads+1) %>%
nest_by(NCBI) %>%
mutate(model = list(gamlss(formula = Freq ~ Sexo + DisfuncionGonadal,
data = data,
family = "BE",
control = gamlss.control(n.cyc = 200, trace = F)))) %>%
mutate(test = list(emmeans(model, specs = c("Sexo","DisfuncionGonadal"), data = data) %>%
pairs() %>%
as.data.frame()))
## Joining, by = "Sample"
gamlss_table %>%
dplyr::select(NCBI,test) %>%
unnest(test) %>%
mutate(padj = p.adjust(p.value,method = "BH")) %>%
inner_join(table %>%
dplyr::select(NCBI,Name2) %>%
mutate(NCBI = as.character(NCBI)) %>%
distinct()) %>%
dplyr::select(NCBI,Name = Name2,contrast,z.ratio,padj) %>%
write_tsv("Cuestion5.1.tsv")
## Joining, by = "NCBI"
gamlss_table %>%
dplyr::select(NCBI,test) %>%
unnest(test) %>%
mutate(padj = p.adjust(p.value,method = "BH")) %>%
inner_join(table %>%
dplyr::select(NCBI,Name2) %>%
mutate(NCBI = as.character(NCBI)) %>%
distinct()) %>%
dplyr::select(NCBI,Name = Name2,contrast,z.ratio,padj) %>%
#separate(contrast, c("A","B"), sep = " - ") %>%
mutate(sig = ifelse(padj < 0.01,"Significative","No-Significative")) %>%
mutate(label = ifelse(sig == "Significative",Name,NA)) %>%
ggplot(aes(x= z.ratio, y = -log10(padj), color = sig, label = label)) +
geom_point(alpha = 0.5) +
geom_text_repel(size = 4)+
scale_color_d3() +
theme_light() + facet_wrap(~contrast)
## Joining, by = "NCBI"
## Warning: Removed 6023 rows containing missing values (`geom_text_repel()`).
Crear nueva variable. Disfuncion gonadal (SI <- Machos Castrados, Hembras Tratamiento Si, El resto NO) Sampling: Hembras Dieta SI/NO, Tratameinto SI/NO Machos Dieta SI/NO, Tratamiento NO Machos Castrados, Dieta SI/NO, Tratamiento NO formula = ~ Sexo * Disfuncion Gonadal * Dieta
Creamos la OTU-table para resolver la pregunta.
table2 <- table %>%
mutate(DisfuncionGonadal = 0) %>%
mutate(DisfuncionGonadal = ifelse(Castrado ==1,1,DisfuncionGonadal)) %>%
mutate(DisfuncionGonadal = ifelse((Sexo ==0 & Tratamiento ==1),1,DisfuncionGonadal))%>%
mutate(DisfuncionGonadal = as.factor(DisfuncionGonadal))
metadata2 <- metadata %>%
mutate(DisfuncionGonadal = 0) %>%
mutate(DisfuncionGonadal = ifelse(Castrado ==1,1,DisfuncionGonadal)) %>%
mutate(DisfuncionGonadal = ifelse((Sexo ==0 & Tratamiento ==1),1,DisfuncionGonadal))%>%
mutate(DisfuncionGonadal = as.factor(DisfuncionGonadal))
samplesGroup <- metadata %>%
filter(Studio ==3) %>%
filter(Description =="Adulta") %>%
filter(Sexo ==0) %>%
bind_rows(metadata %>%
filter(Studio ==3) %>%
filter(Description =="Adulta") %>%
filter(Sexo ==1 &Tratamiento ==0)) %>%
dplyr::select(Sample)
samplingSize <- table2 %>%
filter(Name2 == "Bacteria") %>%
semi_join(samplesGroup) %>%
group_by(Sample) %>%
summarise(TotalReads = sum(Reads)) %>%
slice_min(TotalReads)
## Joining, by = "Sample"
otu_table <- table2 %>%
filter(!grepl("S",TaxRank)) %>%
filter(!grepl("U",TaxRank)) %>%
filter(!grepl("R",TaxRank)) %>%
filter(!grepl("D",TaxRank)) %>%
semi_join(samplesGroup) %>%
dplyr::select(Sample,Reads,NCBI) %>%
pivot_wider(names_from = Sample,
values_from = Reads,
values_fill =0) %>%
column_to_rownames("NCBI") %>%
as.matrix() %>%
t() %>%
rrarefy(sample = samplingSize$TotalReads) %>%
t()
## Joining, by = "Sample"
Calculamos la alpha diversity
simpson <- diversity(otu_table, MARGIN = 2, index = "simpson") %>%
as.data.frame() %>%
rownames_to_column("Sample") %>%
rename(Simpson =2)
shannon <- diversity(otu_table, MARGIN = 2,index = "shannon" ) %>%
as.data.frame() %>%
rownames_to_column("Sample") %>%
rename(Shannon = 2)
div <- inner_join(simpson, shannon) %>%
pivot_longer(names_to = "DiversityIndex",values_to = "DiversityValue", -Sample) %>%
inner_join(metadata2)
## Joining, by = "Sample"
## Joining, by = "Sample"
Análisis del indice de Simpson
mod_gaus <- glm(DiversityValue ~ Sexo*DisfuncionGonadal*Dieta, data = div %>%
filter(DiversityIndex == "Simpson"),
family = gaussian)
report(mod_gaus)
## We fitted a linear model (estimated using ML) to predict DiversityValue with
## Sexo, DisfuncionGonadal and Dieta (formula: DiversityValue ~ Sexo *
## DisfuncionGonadal * Dieta). The model's explanatory power is moderate (R2 =
## 0.20). The model's intercept, corresponding to Sexo = 0, DisfuncionGonadal = 0
## and Dieta = 0, is at 0.93 (95% CI [0.92, 0.95], t(60) = 148.63, p < .001).
## Within this model:
##
## - The effect of Sexo [1] is statistically non-significant and positive (beta =
## 0.01, 95% CI [-3.90e-03, 0.03], t(60) = 1.52, p = 0.128; Std. beta = 0.77, 95%
## CI [-0.22, 1.76])
## - The effect of DisfuncionGonadal [1] is statistically significant and positive
## (beta = 0.02, 95% CI [6.52e-03, 0.04], t(60) = 2.76, p = 0.006; Std. beta =
## 1.28, 95% CI [0.37, 2.20])
## - The effect of Dieta [1] is statistically non-significant and positive (beta =
## 0.01, 95% CI [-5.35e-03, 0.03], t(60) = 1.31, p = 0.191; Std. beta = 0.61, 95%
## CI [-0.30, 1.52])
## - The effect of Sexo [1] × DisfuncionGonadal [1] is statistically
## non-significant and negative (beta = -0.02, 95% CI [-0.04, 3.99e-03], t(60) =
## -1.62, p = 0.104; Std. beta = -1.10, 95% CI [-2.42, 0.23])
## - The effect of Sexo [1] × Dieta [1] is statistically significant and negative
## (beta = -0.03, 95% CI [-0.05, -2.16e-03], t(60) = -2.14, p = 0.032; Std. beta =
## -1.43, 95% CI [-2.74, -0.12])
## - The effect of DisfuncionGonadal [1] × Dieta [1] is statistically
## non-significant and negative (beta = -0.02, 95% CI [-0.04, 1.50e-03], t(60) =
## -1.83, p = 0.068; Std. beta = -1.16, 95% CI [-2.41, 0.09])
## - The effect of (Sexo [1] × DisfuncionGonadal [1]) × Dieta [1] is statistically
## non-significant and positive (beta = 0.02, 95% CI [-9.46e-03, 0.05], t(60) =
## 1.38, p = 0.168; Std. beta = 1.28, 95% CI [-0.54, 3.09])
##
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation.
parameters(mod_gaus)
## Parameter | Coefficient | SE | 95% CI | t(60) | p
## ----------------------------------------------------------------------------------------------------------
## (Intercept) | 0.93 | 6.28e-03 | [ 0.92, 0.95] | 148.63 | < .001
## Sexo [1] | 0.01 | 8.88e-03 | [ 0.00, 0.03] | 1.52 | 0.128
## DisfuncionGonadal [1] | 0.02 | 8.19e-03 | [ 0.01, 0.04] | 2.76 | 0.006
## Dieta [1] | 0.01 | 8.19e-03 | [-0.01, 0.03] | 1.31 | 0.191
## Sexo [1] × DisfuncionGonadal [1] | -0.02 | 0.01 | [-0.04, 0.00] | -1.62 | 0.104
## Sexo [1] × Dieta [1] | -0.03 | 0.01 | [-0.05, 0.00] | -2.14 | 0.032
## DisfuncionGonadal [1] × Dieta [1] | -0.02 | 0.01 | [-0.04, 0.00] | -1.83 | 0.068
## (Sexo [1] × DisfuncionGonadal [1]) × Dieta [1] | 0.02 | 0.02 | [-0.01, 0.05] | 1.38 | 0.168
##
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
## computed using a Wald t-distribution approximation.
estimate_contrasts(mod_gaus, contrast = c("DisfuncionGonadal","Sexo","Dieta"))
## Marginal Contrasts Analysis
##
## Level1 | Level2 | Difference | 95% CI | SE | t(60) | p
## --------------------------------------------------------------------------------------------------------------------------
## DisfuncionGonadal0 Sexo0 Dieta0 | DisfuncionGonadal0 Sexo0 Dieta1 | -0.01 | [-0.04, 0.02] | 8.19e-03 | -1.31 | > .999
## DisfuncionGonadal0 Sexo0 Dieta0 | DisfuncionGonadal0 Sexo1 Dieta0 | -0.01 | [-0.04, 0.02] | 8.88e-03 | -1.52 | > .999
## DisfuncionGonadal0 Sexo0 Dieta0 | DisfuncionGonadal0 Sexo1 Dieta1 | 9.09e-04 | [-0.03, 0.03] | 8.37e-03 | 0.11 | > .999
## DisfuncionGonadal0 Sexo0 Dieta0 | DisfuncionGonadal1 Sexo0 Dieta0 | -0.02 | [-0.05, 0.00] | 8.19e-03 | -2.76 | 0.209
## DisfuncionGonadal0 Sexo0 Dieta0 | DisfuncionGonadal1 Sexo0 Dieta1 | -0.01 | [-0.04, 0.01] | 8.37e-03 | -1.53 | > .999
## DisfuncionGonadal0 Sexo0 Dieta0 | DisfuncionGonadal1 Sexo1 Dieta0 | -0.02 | [-0.04, 0.01] | 8.60e-03 | -1.95 | > .999
## DisfuncionGonadal0 Sexo0 Dieta0 | DisfuncionGonadal1 Sexo1 Dieta1 | -4.35e-03 | [-0.03, 0.02] | 8.60e-03 | -0.51 | > .999
## DisfuncionGonadal0 Sexo0 Dieta1 | DisfuncionGonadal0 Sexo1 Dieta1 | 0.01 | [-0.01, 0.04] | 7.63e-03 | 1.52 | > .999
## DisfuncionGonadal0 Sexo0 Dieta1 | DisfuncionGonadal1 Sexo0 Dieta1 | -2.12e-03 | [-0.03, 0.02] | 7.63e-03 | -0.28 | > .999
## DisfuncionGonadal0 Sexo0 Dieta1 | DisfuncionGonadal1 Sexo1 Dieta1 | 6.35e-03 | [-0.02, 0.03] | 7.88e-03 | 0.81 | > .999
## DisfuncionGonadal0 Sexo1 Dieta0 | DisfuncionGonadal0 Sexo0 Dieta1 | 2.81e-03 | [-0.02, 0.03] | 8.19e-03 | 0.34 | > .999
## DisfuncionGonadal0 Sexo1 Dieta0 | DisfuncionGonadal0 Sexo1 Dieta1 | 0.01 | [-0.01, 0.04] | 8.37e-03 | 1.72 | > .999
## DisfuncionGonadal0 Sexo1 Dieta0 | DisfuncionGonadal1 Sexo0 Dieta1 | 6.85e-04 | [-0.03, 0.03] | 8.37e-03 | 0.08 | > .999
## DisfuncionGonadal0 Sexo1 Dieta0 | DisfuncionGonadal1 Sexo1 Dieta0 | -3.28e-03 | [-0.03, 0.02] | 8.60e-03 | -0.38 | > .999
## DisfuncionGonadal0 Sexo1 Dieta0 | DisfuncionGonadal1 Sexo1 Dieta1 | 9.16e-03 | [-0.02, 0.04] | 8.60e-03 | 1.06 | > .999
## DisfuncionGonadal0 Sexo1 Dieta1 | DisfuncionGonadal1 Sexo1 Dieta1 | -5.26e-03 | [-0.03, 0.02] | 8.07e-03 | -0.65 | > .999
## DisfuncionGonadal1 Sexo0 Dieta0 | DisfuncionGonadal0 Sexo0 Dieta1 | 0.01 | [-0.01, 0.04] | 7.43e-03 | 1.60 | > .999
## DisfuncionGonadal1 Sexo0 Dieta0 | DisfuncionGonadal0 Sexo1 Dieta0 | 9.06e-03 | [-0.02, 0.04] | 8.19e-03 | 1.11 | > .999
## DisfuncionGonadal1 Sexo0 Dieta0 | DisfuncionGonadal0 Sexo1 Dieta1 | 0.02 | [ 0.00, 0.05] | 7.63e-03 | 3.07 | 0.089
## DisfuncionGonadal1 Sexo0 Dieta0 | DisfuncionGonadal1 Sexo0 Dieta1 | 9.75e-03 | [-0.02, 0.03] | 7.63e-03 | 1.28 | > .999
## DisfuncionGonadal1 Sexo0 Dieta0 | DisfuncionGonadal1 Sexo1 Dieta0 | 5.78e-03 | [-0.02, 0.03] | 7.88e-03 | 0.73 | > .999
## DisfuncionGonadal1 Sexo0 Dieta0 | DisfuncionGonadal1 Sexo1 Dieta1 | 0.02 | [-0.01, 0.04] | 7.88e-03 | 2.31 | 0.631
## DisfuncionGonadal1 Sexo0 Dieta1 | DisfuncionGonadal0 Sexo1 Dieta1 | 0.01 | [-0.01, 0.04] | 7.83e-03 | 1.75 | > .999
## DisfuncionGonadal1 Sexo0 Dieta1 | DisfuncionGonadal1 Sexo1 Dieta1 | 8.47e-03 | [-0.02, 0.03] | 8.07e-03 | 1.05 | > .999
## DisfuncionGonadal1 Sexo1 Dieta0 | DisfuncionGonadal0 Sexo0 Dieta1 | 6.09e-03 | [-0.02, 0.03] | 7.88e-03 | 0.77 | > .999
## DisfuncionGonadal1 Sexo1 Dieta0 | DisfuncionGonadal0 Sexo1 Dieta1 | 0.02 | [-0.01, 0.04] | 8.07e-03 | 2.19 | 0.807
## DisfuncionGonadal1 Sexo1 Dieta0 | DisfuncionGonadal1 Sexo0 Dieta1 | 3.97e-03 | [-0.02, 0.03] | 8.07e-03 | 0.49 | > .999
## DisfuncionGonadal1 Sexo1 Dieta0 | DisfuncionGonadal1 Sexo1 Dieta1 | 0.01 | [-0.01, 0.04] | 8.31e-03 | 1.50 | > .999
##
## Marginal contrasts estimated at DisfuncionGonadal, Sexo, Dieta
## p-value adjustment method: Holm (1979)
Análisis del indice de Shannon
mod_gaus <- glm(DiversityValue ~Sexo * DisfuncionGonadal * Dieta, data = div %>%
filter(DiversityIndex == "Shannon"),
family = gaussian)
report(mod_gaus)
## We fitted a linear model (estimated using ML) to predict DiversityValue with
## Sexo, DisfuncionGonadal and Dieta (formula: DiversityValue ~ Sexo *
## DisfuncionGonadal * Dieta). The model's explanatory power is moderate (R2 =
## 0.25). The model's intercept, corresponding to Sexo = 0, DisfuncionGonadal = 0
## and Dieta = 0, is at 3.22 (95% CI [3.04, 3.41], t(60) = 33.52, p < .001).
## Within this model:
##
## - The effect of Sexo [1] is statistically significant and positive (beta =
## 0.34, 95% CI [0.07, 0.61], t(60) = 2.50, p = 0.013; Std. beta = 1.22, 95% CI
## [0.26, 2.18])
## - The effect of DisfuncionGonadal [1] is statistically significant and positive
## (beta = 0.43, 95% CI [0.19, 0.68], t(60) = 3.44, p < .001; Std. beta = 1.55,
## 95% CI [0.66, 2.43])
## - The effect of Dieta [1] is statistically non-significant and positive (beta =
## 0.18, 95% CI [-0.06, 0.43], t(60) = 1.47, p = 0.141; Std. beta = 0.66, 95% CI
## [-0.22, 1.55])
## - The effect of Sexo [1] × DisfuncionGonadal [1] is statistically significant
## and negative (beta = -0.39, 95% CI [-0.75, -0.03], t(60) = -2.14, p = 0.032;
## Std. beta = -1.40, 95% CI [-2.68, -0.12])
## - The effect of Sexo [1] × Dieta [1] is statistically significant and negative
## (beta = -0.45, 95% CI [-0.80, -0.10], t(60) = -2.51, p = 0.012; Std. beta =
## -1.61, 95% CI [-2.88, -0.35])
## - The effect of DisfuncionGonadal [1] × Dieta [1] is statistically significant
## and negative (beta = -0.42, 95% CI [-0.76, -0.09], t(60) = -2.47, p = 0.014;
## Std. beta = -1.52, 95% CI [-2.73, -0.31])
## - The effect of (Sexo [1] × DisfuncionGonadal [1]) × Dieta [1] is statistically
## non-significant and positive (beta = 0.48, 95% CI [-6.83e-03, 0.97], t(60) =
## 1.93, p = 0.053; Std. beta = 1.73, 95% CI [-0.02, 3.48])
##
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation.
parameters(mod_gaus)
## Parameter | Coefficient | SE | 95% CI | t(60) | p
## -----------------------------------------------------------------------------------------------------
## (Intercept) | 3.22 | 0.10 | [ 3.04, 3.41] | 33.52 | < .001
## Sexo [1] | 0.34 | 0.14 | [ 0.07, 0.61] | 2.50 | 0.013
## DisfuncionGonadal [1] | 0.43 | 0.13 | [ 0.19, 0.68] | 3.44 | < .001
## Dieta [1] | 0.18 | 0.13 | [-0.06, 0.43] | 1.47 | 0.141
## Sexo [1] × DisfuncionGonadal [1] | -0.39 | 0.18 | [-0.75, -0.03] | -2.14 | 0.032
## Sexo [1] × Dieta [1] | -0.45 | 0.18 | [-0.80, -0.10] | -2.51 | 0.012
## DisfuncionGonadal [1] × Dieta [1] | -0.42 | 0.17 | [-0.76, -0.09] | -2.47 | 0.014
## (Sexo [1] × DisfuncionGonadal [1]) × Dieta [1] | 0.48 | 0.25 | [-0.01, 0.97] | 1.93 | 0.053
##
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
## computed using a Wald t-distribution approximation.
estimate_contrasts(mod_gaus, contrast = c("DisfuncionGonadal","Sexo","Dieta"))
## Marginal Contrasts Analysis
##
## Level1 | Level2 | Difference | 95% CI | SE | t(60) | p
## -----------------------------------------------------------------------------------------------------------------------
## DisfuncionGonadal0 Sexo0 Dieta0 | DisfuncionGonadal0 Sexo0 Dieta1 | -0.18 | [-0.59, 0.23] | 0.13 | -1.47 | > .999
## DisfuncionGonadal0 Sexo0 Dieta0 | DisfuncionGonadal0 Sexo1 Dieta0 | -0.34 | [-0.78, 0.11] | 0.14 | -2.50 | 0.384
## DisfuncionGonadal0 Sexo0 Dieta0 | DisfuncionGonadal0 Sexo1 Dieta1 | -0.07 | [-0.49, 0.34] | 0.13 | -0.58 | > .999
## DisfuncionGonadal0 Sexo0 Dieta0 | DisfuncionGonadal1 Sexo0 Dieta0 | -0.43 | [-0.84, -0.02] | 0.13 | -3.44 | 0.030
## DisfuncionGonadal0 Sexo0 Dieta0 | DisfuncionGonadal1 Sexo0 Dieta1 | -0.19 | [-0.61, 0.23] | 0.13 | -1.50 | > .999
## DisfuncionGonadal0 Sexo0 Dieta0 | DisfuncionGonadal1 Sexo1 Dieta0 | -0.38 | [-0.81, 0.05] | 0.13 | -2.90 | 0.137
## DisfuncionGonadal0 Sexo0 Dieta0 | DisfuncionGonadal1 Sexo1 Dieta1 | -0.17 | [-0.61, 0.26] | 0.13 | -1.33 | > .999
## DisfuncionGonadal0 Sexo0 Dieta1 | DisfuncionGonadal0 Sexo1 Dieta1 | 0.11 | [-0.27, 0.49] | 0.12 | 0.94 | > .999
## DisfuncionGonadal0 Sexo0 Dieta1 | DisfuncionGonadal1 Sexo0 Dieta1 | -7.61e-03 | [-0.39, 0.37] | 0.12 | -0.07 | > .999
## DisfuncionGonadal0 Sexo0 Dieta1 | DisfuncionGonadal1 Sexo1 Dieta1 | 0.01 | [-0.38, 0.40] | 0.12 | 0.08 | > .999
## DisfuncionGonadal0 Sexo1 Dieta0 | DisfuncionGonadal0 Sexo0 Dieta1 | 0.15 | [-0.26, 0.56] | 0.13 | 1.23 | > .999
## DisfuncionGonadal0 Sexo1 Dieta0 | DisfuncionGonadal0 Sexo1 Dieta1 | 0.26 | [-0.15, 0.68] | 0.13 | 2.07 | 0.908
## DisfuncionGonadal0 Sexo1 Dieta0 | DisfuncionGonadal1 Sexo0 Dieta1 | 0.15 | [-0.27, 0.57] | 0.13 | 1.15 | > .999
## DisfuncionGonadal0 Sexo1 Dieta0 | DisfuncionGonadal1 Sexo1 Dieta0 | -0.04 | [-0.47, 0.39] | 0.13 | -0.32 | > .999
## DisfuncionGonadal0 Sexo1 Dieta0 | DisfuncionGonadal1 Sexo1 Dieta1 | 0.16 | [-0.27, 0.60] | 0.13 | 1.25 | > .999
## DisfuncionGonadal0 Sexo1 Dieta1 | DisfuncionGonadal1 Sexo1 Dieta1 | -0.10 | [-0.50, 0.30] | 0.12 | -0.81 | > .999
## DisfuncionGonadal1 Sexo0 Dieta0 | DisfuncionGonadal0 Sexo0 Dieta1 | 0.25 | [-0.13, 0.62] | 0.11 | 2.16 | 0.796
## DisfuncionGonadal1 Sexo0 Dieta0 | DisfuncionGonadal0 Sexo1 Dieta0 | 0.09 | [-0.32, 0.50] | 0.13 | 0.73 | > .999
## DisfuncionGonadal1 Sexo0 Dieta0 | DisfuncionGonadal0 Sexo1 Dieta1 | 0.36 | [-0.03, 0.74] | 0.12 | 3.05 | 0.093
## DisfuncionGonadal1 Sexo0 Dieta0 | DisfuncionGonadal1 Sexo0 Dieta1 | 0.24 | [-0.14, 0.62] | 0.12 | 2.04 | 0.916
## DisfuncionGonadal1 Sexo0 Dieta0 | DisfuncionGonadal1 Sexo1 Dieta0 | 0.05 | [-0.35, 0.44] | 0.12 | 0.41 | > .999
## DisfuncionGonadal1 Sexo0 Dieta0 | DisfuncionGonadal1 Sexo1 Dieta1 | 0.26 | [-0.14, 0.65] | 0.12 | 2.12 | 0.834
## DisfuncionGonadal1 Sexo0 Dieta1 | DisfuncionGonadal0 Sexo1 Dieta1 | 0.12 | [-0.27, 0.51] | 0.12 | 0.98 | > .999
## DisfuncionGonadal1 Sexo0 Dieta1 | DisfuncionGonadal1 Sexo1 Dieta1 | 0.02 | [-0.39, 0.42] | 0.12 | 0.14 | > .999
## DisfuncionGonadal1 Sexo1 Dieta0 | DisfuncionGonadal0 Sexo0 Dieta1 | 0.20 | [-0.20, 0.59] | 0.12 | 1.63 | > .999
## DisfuncionGonadal1 Sexo1 Dieta0 | DisfuncionGonadal0 Sexo1 Dieta1 | 0.31 | [-0.10, 0.71] | 0.12 | 2.48 | 0.384
## DisfuncionGonadal1 Sexo1 Dieta0 | DisfuncionGonadal1 Sexo0 Dieta1 | 0.19 | [-0.22, 0.59] | 0.12 | 1.53 | > .999
## DisfuncionGonadal1 Sexo1 Dieta0 | DisfuncionGonadal1 Sexo1 Dieta1 | 0.21 | [-0.21, 0.62] | 0.13 | 1.62 | > .999
##
## Marginal contrasts estimated at DisfuncionGonadal, Sexo, Dieta
## p-value adjustment method: Holm (1979)
Vamos a calcular la Beta-diversidad.
dist <- avgdist(otu_table %>% t(),sample = samplingSize$TotalReads, dmethod = "bray") %>%
as.matrix() %>%
as_tibble(rownames = "Sample") %>%
inner_join(metadata2)
## Joining, by = "Sample"
pheatmap::pheatmap(dist %>%
dplyr::select(Sample,contains("report")) %>%
column_to_rownames("Sample") %>%
as.matrix() %>%
as.dist(),
annotation_row = dist %>%
dplyr::select(Sample,Sexo,DisfuncionGonadal,Dieta) %>%
mutate(Sexo = as.factor(Sexo)) %>%
mutate(DisfuncionGonadal = as.factor(DisfuncionGonadal)) %>%
mutate(Dieta = as.factor(Dieta)) %>%
column_to_rownames("Sample"))
X = otu_table %>% t()
Y = otu_table %>%
t() %>%
as_tibble(rownames = "Sample") %>% inner_join(metadata %>% dplyr::select(Sample,Sexo)) %>% pull(Sexo)
## Joining, by = "Sample"
pl <- splsda(X,Y, ncomp = 10, scale = T)
## Warning in cor(A[[k]], variates.A[[k]]): the standard deviation is zero
plotIndiv(pl, ind.names = T, ellipse = TRUE, legend =TRUE)
Y = otu_table %>%
t() %>%
as_tibble(rownames = "Sample") %>% inner_join(metadata2 %>% dplyr::select(Sample,DisfuncionGonadal)) %>% pull(DisfuncionGonadal)
## Joining, by = "Sample"
pl <- splsda(X,Y, ncomp = 10, scale = T)
## Warning in cor(A[[k]], variates.A[[k]]): the standard deviation is zero
plotIndiv(pl, ind.names = T, ellipse = TRUE, legend =TRUE)
Y = otu_table %>%
t() %>%
as_tibble(rownames = "Sample") %>% inner_join(metadata2 %>% dplyr::select(Sample,Dieta)) %>% pull(Dieta)
## Joining, by = "Sample"
pl <- splsda(X,Y, ncomp = 10, scale = T)
## Warning in cor(A[[k]], variates.A[[k]]): the standard deviation is zero
plotIndiv(pl, ind.names = T, ellipse = TRUE, legend =TRUE)
dist_ad <-dist %>%
dplyr::select(Sample,contains("report")) %>%
column_to_rownames("Sample") %>%
as.matrix() %>%
as.dist()
adonis2(dist_ad ~ Sexo * DisfuncionGonadal * Dieta, dist %>%
dplyr::select(Sample,Sexo,DisfuncionGonadal,Dieta) %>%
column_to_rownames("Sample"))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_ad ~ Sexo * DisfuncionGonadal * Dieta, data = dist %>% dplyr::select(Sample, Sexo, DisfuncionGonadal, Dieta) %>% column_to_rownames("Sample"))
## Df SumOfSqs R2 F Pr(>F)
## Sexo 1 0.1766 0.02566 2.1919 0.068 .
## DisfuncionGonadal 1 0.2006 0.02913 2.4889 0.047 *
## Dieta 1 1.1689 0.16980 14.5064 0.001 ***
## Sexo:DisfuncionGonadal 1 0.0781 0.01134 0.9692 0.418
## Sexo:Dieta 1 0.2098 0.03047 2.6034 0.031 *
## DisfuncionGonadal:Dieta 1 0.0919 0.01335 1.1403 0.328
## Sexo:DisfuncionGonadal:Dieta 1 0.1237 0.01796 1.5345 0.167
## Residual 60 4.8348 0.70229
## Total 67 6.8843 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
• Qué taxas son las responsables de las diferencias
gamlss_table <- otu_table %>%
t() %>%
as_tibble(rownames = "Sample") %>%
pivot_longer(names_to = "NCBI", values_to = "Reads",-Sample) %>%
inner_join(metadata2) %>%
group_by(Sample) %>%
mutate(TotalReads = sum(Reads), NSpecies = sum(Reads>0)) %>%
ungroup() %>%
group_by(NCBI) %>%
mutate(Nzeros = sum(Reads ==0)) %>%
ungroup() %>%
mutate(NSamples = n_distinct(Sample)) %>%
mutate(Freq = (Reads+1)/TotalReads) %>%
mutate(ZeroFreq = Nzeros/NSamples) %>%
filter(ZeroFreq < 0.9) %>%
dplyr::select(NCBI,Reads,Freq,ZeroFreq,Sexo,DisfuncionGonadal,Dieta,Sample) %>%
mutate(Reads = Reads+1) %>%
nest_by(NCBI) %>%
mutate(model = list(gamlss(formula = Freq ~ Sexo*Dieta + DisfuncionGonadal,
data = data,
family = "BE",
control = gamlss.control(n.cyc = 200, trace = F)))) %>%
mutate(test = list(emmeans(model, specs = c("Sexo","DisfuncionGonadal","Dieta"), data = data) %>%
pairs() %>%
as.data.frame()))
## Joining, by = "Sample"
gamlss_table %>%
dplyr::select(NCBI,test) %>%
unnest(test) %>%
mutate(padj = p.adjust(p.value,method = "BH")) %>%
inner_join(table %>%
dplyr::select(NCBI,Name2) %>%
mutate(NCBI = as.character(NCBI)) %>%
distinct()) %>%
dplyr::select(NCBI,Name = Name2,contrast,z.ratio,padj) %>%
write_tsv("Cuestion5.2.tsv")
## Joining, by = "NCBI"
gamlss_table %>%
dplyr::select(NCBI,test) %>%
unnest(test) %>%
mutate(padj = p.adjust(p.value,method = "BH")) %>%
inner_join(table %>%
dplyr::select(NCBI,Name2) %>%
mutate(NCBI = as.character(NCBI)) %>%
distinct()) %>%
dplyr::select(NCBI,Name = Name2,contrast,z.ratio,padj) %>%
#separate(contrast, c("A","B"), sep = " - ") %>%
mutate(sig = ifelse(padj < 0.01,"Significative","No-Significative")) %>%
mutate(label = ifelse(sig == "Significative",Name,NA)) %>%
ggplot(aes(x= z.ratio, y = -log10(padj), color = sig, label = label)) +
geom_point(alpha = 0.5) +
geom_text_repel(size = 4)+
scale_color_d3() +
theme_light() + facet_wrap(~contrast)
## Joining, by = "NCBI"
## Warning: Removed 28234 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 18 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 11 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 49 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 54 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 42 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 67 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 5 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 31 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 26 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 42 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 5 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 13 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 67 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 18 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps